Skip to content

Instantly share code, notes, and snippets.

@rchardptrsn
rchardptrsn / compose_sql_server.yaml
Last active July 27, 2024 18:08
Docker compose file for SQL Server 2022
services:
mssql:
container_name: mssql-db
hostname: mssql-db
image: mcr.microsoft.com/mssql/server:2022-latest
environment:
ACCEPT_EULA: 'Y'
MSSQL_SA_PASSWORD: 'Passw0rd'
MSSQL_DATA_DIR: /var/opt/mssql/data
@rchardptrsn
rchardptrsn / analyze NDVI.py
Created August 15, 2023 02:12
analyze NDVI
# Extract Year and Month
fireNDVI['Year'] = fireNDVI.Date.str.split(pat='-',expand=True)[0]
fireNDVI['Month'] = fireNDVI.Date.str.split(pat='-',expand=True)[1]
# filter to June, July, August
fireNDVI = fireNDVI[fireNDVI.Month.isin(['06','07','08'])]
# find average of 3 months for 2021 for object id
# then find average of 3 months for 2022 for object id
# subtract 2022-2021
@rchardptrsn
rchardptrsn / combine files.py
Last active August 20, 2023 01:29
combine files.py
import glob as glob
import pandas as pd
#define path to CSV files
path = r'ndvi data/*.csv'
#identify all CSV files
all_files = glob.glob(path)
#merge all CSV files into one DataFrame
fireNDVI = pd.concat((pd.read_csv(f) for f in all_files), ignore_index=True)
@rchardptrsn
rchardptrsn / download NDVI.py
Created August 14, 2023 04:51
download NDVI
import timeit
import sys
import json
import ee
# Authenticate to google earth engine
service_account = 'your-acct@your-project.iam.gserviceaccount.com'
key_path = 'path_to_your_service_account_key.json'
credentials = ee.ServiceAccountCredentials(service_account, key_path)
ee.Initialize(credentials)
@rchardptrsn
rchardptrsn / read_geojson.py
Last active August 15, 2023 00:43
read geojson.py
import geopandas as gpd
# Read in the dataset from geojson file to geopandas dataframe
gdf = gpd.read_file('WFIGS_-_2022_Wildland_Fire_Perimeters_to_Date.geojson')
# print a description of the dataframe
print(gdf.info())
# Check for NaN records
# Result: geometry does not have any NaN but many other columns do
print(f'Columns with nan records: {gdf.columns[gdf.isna().any()].tolist()}')
@rchardptrsn
rchardptrsn / county totals with geom.sql
Created April 17, 2022 16:31
county totals with geom.
SELECT f.county, f.well_count, co.geom -- pgAdmin will recognize co.geom
FROM ( -- subquery to get totals by county
SELECT county, COUNT(county) well_count
FROM fracking
GROUP BY county
) f
LEFT JOIN co_counties co -- all from f, matching from co
ON f.county = co.county
@rchardptrsn
rchardptrsn / calculate percentiles.sql
Created April 17, 2022 16:30
calculate percentiles in postgresql
SELECT
PERCENTILE_CONT(0.25) WITHIN GROUP(ORDER BY f.well_count) percentile25,
PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY f.well_count) percentile50,
PERCENTILE_CONT(0.75) WITHIN GROUP(ORDER BY f.well_count) percentile75,
PERCENTILE_CONT(1.0) WITHIN GROUP(ORDER BY f.well_count) percentile100
FROM (
SELECT COUNT(county) well_count
FROM fracking
GROUP BY county
) f
@rchardptrsn
rchardptrsn / calculate statistics.sql
Created April 17, 2022 16:28
calculate statistics in postgresql
-- calculate statistics on the totals
SELECT
SUM(f.well_count) n_obs,
COUNT(f.well_count) n_counties,
ROUND(AVG(f.well_count),2) mean,
MIN(f.well_count) min,
MAX(f.well_count) max,
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY f.well_count) median,
ROUND(STDDEV(f.well_count),2) as stdev
-- calculate group totals
@rchardptrsn
rchardptrsn / create temp table from spatial join.sql
Created April 17, 2022 16:27
create temp table from spatial join
CREATE TEMP TABLE fracking AS
SELECT f.name, f.operator, f.status, f.type, co.county
FROM fractracker f
JOIN co_counties co
ON ST_COVEREDBY(f.geom, co.geom);
-- query on TEMP TABLE to find count per county
SELECT county, COUNT(county) well_count
FROM fracking
GROUP BY county
import geopandas as gpd
from sqlalchemy import create_engine
# create the sqlalchemy connection engine
db_connection_url = "postgresql://user:password@localhost:5432/gis_db"
con = create_engine(db_connection_url)
# read in the data
gdf = gpd.read_file('shapefile/holc_ad_data.shp')