Skip to content

Instantly share code, notes, and snippets.

@rchardptrsn
Last active August 15, 2023 00:43
Show Gist options
  • Save rchardptrsn/22092a40a1d0f3a8e85b7c376e6b6835 to your computer and use it in GitHub Desktop.
Save rchardptrsn/22092a40a1d0f3a8e85b7c376e6b6835 to your computer and use it in GitHub Desktop.
read geojson.py
import geopandas as gpd
# Read in the dataset from geojson file to geopandas dataframe
gdf = gpd.read_file('WFIGS_-_2022_Wildland_Fire_Perimeters_to_Date.geojson')
# print a description of the dataframe
print(gdf.info())
# Check for NaN records
# Result: geometry does not have any NaN but many other columns do
print(f'Columns with nan records: {gdf.columns[gdf.isna().any()].tolist()}')
# Count NULLs in poly_IncidentName
# Result: 20
print(f'Count of Nulls in poly_IncidentName: {gdf.poly_IncidentName.isnull().sum()}')
# Identify duplicate records
print(f"Number of duplicate records: {gdf[gdf.duplicated(['poly_IncidentName'], keep=False)].sort_values(by='poly_IncidentName').shape[0]}")
# drop duplicate values in the gdf dataframe
gdf = gdf.sort_values('SHAPE_Area', ascending=False).drop_duplicates('poly_IncidentName').sort_index()
# test to make sure the duplicate values were dropped
print(f"Number of duplicate records: {gdf[gdf.duplicated(['poly_IncidentName'], keep=False)].sort_values(by='poly_IncidentName').shape[0]}")
# Subset by only rows where poly_FeatureCategory = 'Wildfire Final Fire Perimeter'
gdf = gdf[gdf['poly_FeatureCategory']=='Wildfire Final Fire Perimeter']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment