Skip to content

Instantly share code, notes, and snippets.

@patdevinwilson
Last active February 11, 2022 01:09
Show Gist options
  • Save patdevinwilson/43d19f9e033538f6cddf1c6406677419 to your computer and use it in GitHub Desktop.
Save patdevinwilson/43d19f9e033538f6cddf1c6406677419 to your computer and use it in GitHub Desktop.
spatial analysis using Snowflake SQL demo
WITH ZIP_FILTER AS (
SELECT * FROM UTAH_ZIP_CODES
WHERE NAME != 'MOTOQUA' --ZIPCODE FOR MOTOQUA WAS ADDED BY THE STATE OF UTAH AFTER OUR BASELINE'S VINTAGE
),
VALID_REFINE AS (
SELECT
LISTING_ID,
YEAR_BUILT,
ZIPCODE,
PRICE,
BEDROOMS,
BATHROOMS,
SQ_FT,
DESCRIPTION,
ACRES,
CAR_GARAGE,
PATIOS,
GARAGE_TYPE,
HOME_EXTERIOR,
ROOF_TYPE,
AMENITIES,
HEATING,
COOLING,
FLOORING,
ELEMENTARY,
JR_HIGH,
HIGH_SCHOOL,
SCHOOL_DISTRICT,
IFNULL(HH.GEOGRAPHY,ST_CENTROID(Z.GEOGRAPHY)) AS IMPUTE_GEOM --IMPUTED A ZIPCODE CENTROID GEOGRAPHY FOR MISSING GEOGRAPHY(LAT/LONG) OF LISTINGS
FROM UTAH_HOUSE_LISTINGS HH
LEFT JOIN ZIP_FILTER Z ON Z.ZIP5 = HH.ZIPCODE
),
UTAH_HOUSES AS (
SELECT *
FROM VALID_REFINE HH ,UTAH_BOUNDARY U
WHERE ST_WITHIN(HH.IMPUTE_GEOM,U.GEOGRAPHY) --ENSURES OUR IMPUTED GEOGRAPHY/LISTING GEOGRAPHY ARE WITHIN THE STATE BOUNDARY OF UTAH
),
ALL_DIST_HWY AS --BUILDS DISTANCE TABLE FROM HIGHWAY RAMPS TO ALL HOUSES
(SELECT
R.ID,
H.LISTING_ID,
ST_Y(R.GEOGRAPHY) AS R_Y,
ST_X(R.GEOGRAPHY) AS R_X,
ST_Y(H.IMPUTE_GEOM) AS H_Y,
ST_X(H.IMPUTE_GEOM) AS H_X,
HAVERSINE(R_Y,R_X,H_Y,H_X) AS DIST --GENERATES ARC DISTANCE IN KILOMETERS BETWEEN HIGHWAY RAMPS AND HOUSES
FROM UTAH_HOUSES H
LEFT JOIN UTAH_RAMPS R ORDER BY 2,3),
SHORTEST_DIST_HWY AS
(SELECT MIN(DIST) AS MIN_HWY_DIST, LISTING_ID FROM ALL_DIST_HWY GROUP BY LISTING_ID) --SELECTS SHORTEST DISTANCE BETWEEN EACH HOUSE AND HIGHWAY RAMPS FROM HIGHWAY DISTANCES TABLE
,
ALL_DIST_SKI AS --BUILDS DISTANCE TABLE FROM RECREATIONAL SKI AREAS TO ALL HOUSES
(SELECT
S.NAME,
H.LISTING_ID,
ST_Y(ST_CENTROID(S.GEOGRAPHY)) AS S_Y,
ST_X(ST_CENTROID(S.GEOGRAPHY)) AS S_X,
ST_Y(H.IMPUTE_GEOM) AS H_Y,
ST_X(H.IMPUTE_GEOM) AS H_X,
HAVERSINE(S_Y,S_X,H_Y,H_X) AS DIST --GENERATES ARC DISTANCE IN KILOMETERS BETWEEN SKI AREA AND HOUSES
FROM UTAH_HOUSES H
LEFT JOIN UTAH_SKI_SNOWFLAKE S ORDER BY 2,3),
SHORTEST_DIST_SKI AS
(SELECT MIN(DIST) AS MIN_SKI_DIST, LISTING_ID FROM ALL_DIST_SKI GROUP BY LISTING_ID) --SELECTS SHORTEST DISTANCE FROM EACH HOUSE TO SKI AREA FROM THE SKI DISTANCES TABLE
SELECT --STRUCTURING ENRICHED DATA TABLE WITH
HH.LISTING_ID,--BASELINE RAW DATA
YEAR_BUILT,--BASELINE RAW DATA
ZIPCODE,--BASELINE RAW DATA
PRICE,--BASELINE RAW DATA
BEDROOMS,--BASELINE RAW DATA
BATHROOMS,--BASELINE RAW DATA
SQ_FT,--BASELINE RAW DATA
DESCRIPTION,--BASELINE RAW DATA
ACRES,--BASELINE RAW DATA
CAR_GARAGE,--BASELINE RAW DATA
PATIOS,--BASELINE RAW DATA
GARAGE_TYPE,--BASELINE RAW DATA
HOME_EXTERIOR,--BASELINE RAW DATA
ROOF_TYPE,--BASELINE RAW DATA
AMENITIES,--BASELINE RAW DATA
HEATING,--BASELINE RAW DATA
COOLING,--BASELINE RAW DATA
FLOORING,--BASELINE RAW DATA
ELEMENTARY,--BASELINE RAW DATA
JR_HIGH,--BASELINE RAW DATA
HIGH_SCHOOL,--BASELINE RAW DATA
SCHOOL_DISTRICT,--BASELINE RAW DATA
RANK_SCORE, --SCHOOL DISTRICT SCORE
TOTAL_POP,--ACS CENSUS
HOUSEHOLDS,--ACS CENSUS
MEDIAN_INCOME,--ACS CENSUS
HOUSING_UNITS,--ACS CENSUS
RENTER_OCCUPIED_HOUSING_UNITS_PAYING_CASH_MEDIAN_GROSS_RENT,--ACS CENSUS
OCCUPIED_HOUSING_UNITS,--ACS CENSUS
VACANT_HOUSING_UNITS,--ACS CENSUS
VACANT_HOUSING_UNITS_FOR_RENT,--ACS CENSUS
VACANT_HOUSING_UNITS_FOR_SALE,--ACS CENSUS
HOUSING_BUILT_2005_OR_LATER,--ACS CENSUS
HOUSING_BUILT_2000_TO_2004,--ACS CENSUS
HOUSING_BUILT_1939_OR_EARLIER,--ACS CENSUS
MEDIAN_YEAR_STRUCTURE_BUILT,--ACS CENSUS
OWNER_OCCUPIED_HOUSING_UNITS,--ACS CENSUS
MILLION_DOLLAR_HOUSING_UNITS, --ACS CENSUS
AGGREGATE_TRAVEL_TIME_TO_WORK, --ACS CENSUS
COMMUTERS_BY_PUBLIC_TRANSPORTATION, --ACS CENSUS
NATWALKIND as WALK_INDEX, --NATIONAL WALKABILITY SCORE
MIN_HWY_DIST as HWY_DISTANCE, --ARC DISTANCE TO NEAREST HIGHWAY RAMP IN KILOMETERS
MIN_SKI_DIST as SKI_DISTANCE, --ARC DISTANCE TO NEAREST RECREATIONAL SKI AREA IN KILOMETERS
ST_ASTEXT(HH.IMPUTE_GEOM) as GEOGRAPHY
FROM UTAH_HOUSES HH
LEFT JOIN "UTAH_SCHOOL_SNOWFLAKE" S ON RTRIM(DISTRICT) = REPLACE(SCHOOL_DISTRICT, 'School District', '') --JOINS SCHOOL DISTRICT SCORING, RANK_SCORE
LEFT JOIN "CENSUS_ACS_2018_BLOCK_GROUP" BG ON ST_CONTAINS(BG.GEOGRAPHY, HH.IMPUTE_GEOM) --SPATIALLY JOINS RELEVANT AMERICAN COMMUNITY CENSUS DATA
LEFT JOIN "WALK_INDEX_SNOWFLAKE" WI ON ST_CONTAINS(WI.GEOGRAPHY, HH.IMPUTE_GEOM) --SPATIALLY JOINS WALKABILTIY INDEX
LEFT JOIN SHORTEST_DIST_HWY HWY ON HWY.LISTING_ID = HH.LISTING_ID --JOINS THE SHORTEST HIGHWAY DISTANCE TO EACH ROW FOR HOUSE LISTING
LEFT JOIN SHORTEST_DIST_SKI SKI ON SKI.LISTING_ID = HH.LISTING_ID --JOINS THE SHORTEST RECREATIONAL SKI AREA DISTANCE TO EACH ROW FOR HOUSE LISTING
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment