Last active
August 15, 2023 00:43
-
-
Save rchardptrsn/22092a40a1d0f3a8e85b7c376e6b6835 to your computer and use it in GitHub Desktop.
read geojson.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import geopandas as gpd | |
# Read in the dataset from geojson file to geopandas dataframe | |
gdf = gpd.read_file('WFIGS_-_2022_Wildland_Fire_Perimeters_to_Date.geojson') | |
# print a description of the dataframe | |
print(gdf.info()) | |
# Check for NaN records | |
# Result: geometry does not have any NaN but many other columns do | |
print(f'Columns with nan records: {gdf.columns[gdf.isna().any()].tolist()}') | |
# Count NULLs in poly_IncidentName | |
# Result: 20 | |
print(f'Count of Nulls in poly_IncidentName: {gdf.poly_IncidentName.isnull().sum()}') | |
# Identify duplicate records | |
print(f"Number of duplicate records: {gdf[gdf.duplicated(['poly_IncidentName'], keep=False)].sort_values(by='poly_IncidentName').shape[0]}") | |
# drop duplicate values in the gdf dataframe | |
gdf = gdf.sort_values('SHAPE_Area', ascending=False).drop_duplicates('poly_IncidentName').sort_index() | |
# test to make sure the duplicate values were dropped | |
print(f"Number of duplicate records: {gdf[gdf.duplicated(['poly_IncidentName'], keep=False)].sort_values(by='poly_IncidentName').shape[0]}") | |
# Subset by only rows where poly_FeatureCategory = 'Wildfire Final Fire Perimeter' | |
gdf = gdf[gdf['poly_FeatureCategory']=='Wildfire Final Fire Perimeter'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment