Last active
February 11, 2022 01:09
-
-
Save patdevinwilson/43d19f9e033538f6cddf1c6406677419 to your computer and use it in GitHub Desktop.
spatial analysis using Snowflake SQL demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH ZIP_FILTER AS ( | |
SELECT * FROM UTAH_ZIP_CODES | |
WHERE NAME != 'MOTOQUA' --ZIPCODE FOR MOTOQUA WAS ADDED BY THE STATE OF UTAH AFTER OUR BASELINE'S VINTAGE | |
), | |
VALID_REFINE AS ( | |
SELECT | |
LISTING_ID, | |
YEAR_BUILT, | |
ZIPCODE, | |
PRICE, | |
BEDROOMS, | |
BATHROOMS, | |
SQ_FT, | |
DESCRIPTION, | |
ACRES, | |
CAR_GARAGE, | |
PATIOS, | |
GARAGE_TYPE, | |
HOME_EXTERIOR, | |
ROOF_TYPE, | |
AMENITIES, | |
HEATING, | |
COOLING, | |
FLOORING, | |
ELEMENTARY, | |
JR_HIGH, | |
HIGH_SCHOOL, | |
SCHOOL_DISTRICT, | |
IFNULL(HH.GEOGRAPHY,ST_CENTROID(Z.GEOGRAPHY)) AS IMPUTE_GEOM --IMPUTED A ZIPCODE CENTROID GEOGRAPHY FOR MISSING GEOGRAPHY(LAT/LONG) OF LISTINGS | |
FROM UTAH_HOUSE_LISTINGS HH | |
LEFT JOIN ZIP_FILTER Z ON Z.ZIP5 = HH.ZIPCODE | |
), | |
UTAH_HOUSES AS ( | |
SELECT * | |
FROM VALID_REFINE HH ,UTAH_BOUNDARY U | |
WHERE ST_WITHIN(HH.IMPUTE_GEOM,U.GEOGRAPHY) --ENSURES OUR IMPUTED GEOGRAPHY/LISTING GEOGRAPHY ARE WITHIN THE STATE BOUNDARY OF UTAH | |
), | |
ALL_DIST_HWY AS --BUILDS DISTANCE TABLE FROM HIGHWAY RAMPS TO ALL HOUSES | |
(SELECT | |
R.ID, | |
H.LISTING_ID, | |
ST_Y(R.GEOGRAPHY) AS R_Y, | |
ST_X(R.GEOGRAPHY) AS R_X, | |
ST_Y(H.IMPUTE_GEOM) AS H_Y, | |
ST_X(H.IMPUTE_GEOM) AS H_X, | |
HAVERSINE(R_Y,R_X,H_Y,H_X) AS DIST --GENERATES ARC DISTANCE IN KILOMETERS BETWEEN HIGHWAY RAMPS AND HOUSES | |
FROM UTAH_HOUSES H | |
LEFT JOIN UTAH_RAMPS R ORDER BY 2,3), | |
SHORTEST_DIST_HWY AS | |
(SELECT MIN(DIST) AS MIN_HWY_DIST, LISTING_ID FROM ALL_DIST_HWY GROUP BY LISTING_ID) --SELECTS SHORTEST DISTANCE BETWEEN EACH HOUSE AND HIGHWAY RAMPS FROM HIGHWAY DISTANCES TABLE | |
, | |
ALL_DIST_SKI AS --BUILDS DISTANCE TABLE FROM RECREATIONAL SKI AREAS TO ALL HOUSES | |
(SELECT | |
S.NAME, | |
H.LISTING_ID, | |
ST_Y(ST_CENTROID(S.GEOGRAPHY)) AS S_Y, | |
ST_X(ST_CENTROID(S.GEOGRAPHY)) AS S_X, | |
ST_Y(H.IMPUTE_GEOM) AS H_Y, | |
ST_X(H.IMPUTE_GEOM) AS H_X, | |
HAVERSINE(S_Y,S_X,H_Y,H_X) AS DIST --GENERATES ARC DISTANCE IN KILOMETERS BETWEEN SKI AREA AND HOUSES | |
FROM UTAH_HOUSES H | |
LEFT JOIN UTAH_SKI_SNOWFLAKE S ORDER BY 2,3), | |
SHORTEST_DIST_SKI AS | |
(SELECT MIN(DIST) AS MIN_SKI_DIST, LISTING_ID FROM ALL_DIST_SKI GROUP BY LISTING_ID) --SELECTS SHORTEST DISTANCE FROM EACH HOUSE TO SKI AREA FROM THE SKI DISTANCES TABLE | |
SELECT --STRUCTURING ENRICHED DATA TABLE WITH | |
HH.LISTING_ID,--BASELINE RAW DATA | |
YEAR_BUILT,--BASELINE RAW DATA | |
ZIPCODE,--BASELINE RAW DATA | |
PRICE,--BASELINE RAW DATA | |
BEDROOMS,--BASELINE RAW DATA | |
BATHROOMS,--BASELINE RAW DATA | |
SQ_FT,--BASELINE RAW DATA | |
DESCRIPTION,--BASELINE RAW DATA | |
ACRES,--BASELINE RAW DATA | |
CAR_GARAGE,--BASELINE RAW DATA | |
PATIOS,--BASELINE RAW DATA | |
GARAGE_TYPE,--BASELINE RAW DATA | |
HOME_EXTERIOR,--BASELINE RAW DATA | |
ROOF_TYPE,--BASELINE RAW DATA | |
AMENITIES,--BASELINE RAW DATA | |
HEATING,--BASELINE RAW DATA | |
COOLING,--BASELINE RAW DATA | |
FLOORING,--BASELINE RAW DATA | |
ELEMENTARY,--BASELINE RAW DATA | |
JR_HIGH,--BASELINE RAW DATA | |
HIGH_SCHOOL,--BASELINE RAW DATA | |
SCHOOL_DISTRICT,--BASELINE RAW DATA | |
RANK_SCORE, --SCHOOL DISTRICT SCORE | |
TOTAL_POP,--ACS CENSUS | |
HOUSEHOLDS,--ACS CENSUS | |
MEDIAN_INCOME,--ACS CENSUS | |
HOUSING_UNITS,--ACS CENSUS | |
RENTER_OCCUPIED_HOUSING_UNITS_PAYING_CASH_MEDIAN_GROSS_RENT,--ACS CENSUS | |
OCCUPIED_HOUSING_UNITS,--ACS CENSUS | |
VACANT_HOUSING_UNITS,--ACS CENSUS | |
VACANT_HOUSING_UNITS_FOR_RENT,--ACS CENSUS | |
VACANT_HOUSING_UNITS_FOR_SALE,--ACS CENSUS | |
HOUSING_BUILT_2005_OR_LATER,--ACS CENSUS | |
HOUSING_BUILT_2000_TO_2004,--ACS CENSUS | |
HOUSING_BUILT_1939_OR_EARLIER,--ACS CENSUS | |
MEDIAN_YEAR_STRUCTURE_BUILT,--ACS CENSUS | |
OWNER_OCCUPIED_HOUSING_UNITS,--ACS CENSUS | |
MILLION_DOLLAR_HOUSING_UNITS, --ACS CENSUS | |
AGGREGATE_TRAVEL_TIME_TO_WORK, --ACS CENSUS | |
COMMUTERS_BY_PUBLIC_TRANSPORTATION, --ACS CENSUS | |
NATWALKIND as WALK_INDEX, --NATIONAL WALKABILITY SCORE | |
MIN_HWY_DIST as HWY_DISTANCE, --ARC DISTANCE TO NEAREST HIGHWAY RAMP IN KILOMETERS | |
MIN_SKI_DIST as SKI_DISTANCE, --ARC DISTANCE TO NEAREST RECREATIONAL SKI AREA IN KILOMETERS | |
ST_ASTEXT(HH.IMPUTE_GEOM) as GEOGRAPHY | |
FROM UTAH_HOUSES HH | |
LEFT JOIN "UTAH_SCHOOL_SNOWFLAKE" S ON RTRIM(DISTRICT) = REPLACE(SCHOOL_DISTRICT, 'School District', '') --JOINS SCHOOL DISTRICT SCORING, RANK_SCORE | |
LEFT JOIN "CENSUS_ACS_2018_BLOCK_GROUP" BG ON ST_CONTAINS(BG.GEOGRAPHY, HH.IMPUTE_GEOM) --SPATIALLY JOINS RELEVANT AMERICAN COMMUNITY CENSUS DATA | |
LEFT JOIN "WALK_INDEX_SNOWFLAKE" WI ON ST_CONTAINS(WI.GEOGRAPHY, HH.IMPUTE_GEOM) --SPATIALLY JOINS WALKABILTIY INDEX | |
LEFT JOIN SHORTEST_DIST_HWY HWY ON HWY.LISTING_ID = HH.LISTING_ID --JOINS THE SHORTEST HIGHWAY DISTANCE TO EACH ROW FOR HOUSE LISTING | |
LEFT JOIN SHORTEST_DIST_SKI SKI ON SKI.LISTING_ID = HH.LISTING_ID --JOINS THE SHORTEST RECREATIONAL SKI AREA DISTANCE TO EACH ROW FOR HOUSE LISTING |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment