Created
November 6, 2020 19:46
-
-
Save bmschmidt/eba211034510d3a79c96ed34ff8834a7 to your computer and use it in GitHub Desktop.
Code for mapping 2020 elections by subway lines and ethnic data in NYC. Extraordinarily messy, will not run, all sort of filepath names on the data, etc. Most downloaded data from the city of new york; this includes code to live-scrape the 2020 elections data by ED.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
```{r} | |
library(tidyverse) | |
library(sf) | |
shapes = st_read("/drobo/Downloads/Election Districts/geo_export_2ab7b79f-931c-423a-8e71-a78b1e084d86.shp", stringsAsFactors=FALSE) | |
other_stuff = tibble(`Unit Name` = c("Public Counter", "Emergency", "Absentee/Military", "Federal", "Affidavit")) | |
read_election = function(fr) { | |
fr %>% read_csv() %>% | |
mutate(elect_dist = str_c(AD, ED) %>% as.numeric) %>% | |
anti_join(other_stuff) %>% | |
separate(`Unit Name`, c("name", "party"), "[\\(\\)]", remove=F) %>% | |
group_by(elect_dist) %>% | |
mutate(tot = sum(Tally)) %>% | |
mutate(share = Tally/tot) %>% | |
ungroup | |
} | |
#"~/Downloads/00001100000Citywide Mayor Citywide EDLevel.csv" %>% read_election -> e1 | |
#"~/Downloads/00000500000Citywide Governor Lieutenant Governor Citywide EDLevel.csv" %>% read_election -> e2 | |
#"~/Downloads/01000200000Citywide Democratic President Citywide EDLevel.csv" %>% read_election -> e3 | |
e2 = read_csv("/drobo/Downloads/00000100000Citywide President Vice President Citywide EDLevel.csv") | |
library(rvest) | |
y2020 = read_html("https://web.enrboenyc.us/CD23464AD0.html") | |
links = y2020 %>% html_nodes("a") %>% html_attr("href") %>% keep(~.x %>% str_detect("CD")) | |
ADs = links %>% str_extract("AD...") | |
all_pages = links %>% map(~read_html(str_glue("https://web.enrboenyc.us/", .x))) | |
``` | |
```{r} | |
library(tidyverse) | |
t_to_rows = . %>% html_nodes("table.underline") %>% map(~.x %>% html_table(header = FALSE)) %>% purrr::pluck(1) | |
joint = all_pages %>% map2_dfr(ADs, ~t_to_rows(.x) %>% mutate(ADE = .y)) | |
joint = joint %>% select(ED = X1, p_in = X2, Biden.Democratic = X4, Trump.Republican = X6, Trump.Conservative = X8, Biden.Working_Families = X10, Hawkins.Green = X12, Jorgensen.Libertarian = X14, Pierce.Independence = X16, write_in = X18, ADE = ADE) | |
tidied_2020 = joint %>% filter(Biden.Democratic %>% str_detect("Democrat|Biden", negate = TRUE)) %>% | |
pivot_longer(cols=c(-ED,-p_in, -ADE), values_to = "votes") %>% mutate(votes = as.numeric(votes)) %>% | |
separate(name, c("cand", "party")) %>% mutate(ED = ED %>% str_extract("[0-9]+") %>% as.numeric) %>% | |
mutate(AD = ADE%>% str_extract("[0-9]+") %>% as.numeric) %>% mutate(elect_dist = 100 * AD + ED) %>% | |
count(elect_dist, cand, wt=votes, name="votes") %>% group_by(elect_dist) %>% mutate(share = votes/sum(votes), total = sum(votes)) %>% filter(!is.na(elect_dist)) %>% ungroup | |
tidied_2020 %>% count(cand, wt=votes) | |
tidied_2016 = e2 %>% mutate( | |
AD = AD, | |
ED = as.numeric(ED), | |
cand = `Unit Name` %>% str_extract( "([A-Za-z]+) /") %>% str_replace(" /", ""), | |
party = `Unit Name` %>% str_extract( "\\(.*\\)") %>% str_replace_all("[()]", ""), | |
votes = Tally | |
)%>% mutate(elect_dist=AD*1000 + ED) %>% | |
filter(!is.na(party)) %>% | |
count(elect_dist, cand, wt=votes, name = "votes") %>% group_by(elect_dist) %>% mutate(share = votes/sum(votes), total = sum(votes)) %>% ungroup | |
tidied_2016 %>% ungroup %>% sample_n(10) | |
tidied_2016 %>% count(cand, wt=votes) | |
reshaper = function(x, year=2016) {x %>% filter(cand %in% c("Clinton", "Biden", "Trump")) %>% select(elect_dist, cand, votes, share) %>% pivot_wider(names_prefix=year, names_from = (c("cand")), values_from = c(votes, share))} | |
reshaped = tidied_2016 %>% reshaper(2016) %>% full_join(tidied_2020 %>% reshaper(2020)) | |
jointmap = shapes %>% inner_join(reshaped) %>% st_simplify(dTolerance = .0001) | |
jointmap %>% ggplot() + geom_sf(aes(fill = share_2020Biden), lwd=.03) + | |
scale_fill_distiller(type = "div", limits = c(.15, .85), direction = 1, palette = 5, labels = scales::percent, oob = scales::squish) + labs(title = "As of Nov 5, NYC prez results", caption="Ben Schmidt") + theme_bw() | |
acs = read_sf("/drobo/Downloads/ACS.geojson") | |
acs2 = acs %>% select(OBJECTID, BoroName, NTACode, geogname, ends_with("E", ignore.case = FALSE)) %>% st_transform(crs = 2263) | |
jointmap = jointmap %>% st_transform(crs = 2263) | |
ethnic_groups = acs2 %>% st_set_geometry(NULL) %>% | |
select(OBJECTID, ends_with("E", ignore.case = FALSE)) %>% pivot_longer(cols = (-OBJECTID)) %>% | |
filter(name %>% str_detect("[0-9]|male|fem|nhspE|asnnhE|mdageE", negate = TRUE)) %>% | |
count(OBJECTID, name, wt=value) | |
top_groups = ethnic_groups %>% | |
group_by(OBJECTID) %>% | |
mutate(share = n/sum(n)) %>% | |
arrange(-n) %>% | |
slice(1) | |
acs3 = acs2 %>% inner_join(top_group) | |
acs2 %>% inner_join(top_group) %>% ggplot() + geom_sf(aes(fill = name)) + labs(title = "Top ethnic group, by neighborhood.") + scale_fill_brewer(type="div", palette = 2) | |
combined = jointmap %>% st_join(acs3) %>% | |
group_by(OBJECTID) %>% | |
summarize(across(c(votes_2020Biden, votes_2020Trump, votes_2016Clinton, votes_2016Trump), function(x) {sum(x, na.rm = TRUE)})) | |
simp = combined %>% st_simplify(dTolerance = 202) | |
simp = simp %>% mutate( | |
biden_share = votes_2020Biden/(votes_2020Biden + votes_2020Trump), | |
clinton_share = votes_2016Clinton/(votes_2016Clinton + votes_2016Trump) | |
) | |
simp %>% ggplot() + geom_sf(aes(fill = votes_2016Trump/(votes_2016Trump + votes_2016Clinton)), lwd = 0.2) | |
simp %>% inner_join(top_group) %>% inner_join(acs3 %>% st_set_geometry(NULL)) %>% | |
inner_join(names) %>% | |
ggplot() + geom_boxplot() + aes(x = `top group`, y = biden_share - clinton_share, color= BoroName) + scale_y_continuous(labels = scales::percent, limits = c(-.15, 0)) + coord_flip() + | |
theme_bw() + labs(title = "Biden underperformed Clinton in NYC's immigrant neighborhoods.", | |
subtitle = "Change from Clinton to Biden share of two-party vote.") | |
simp %>% inner_join(top_group) %>% inner_join(acs3 %>% st_set_geometry(NULL)) | |
complicated = simp %>% inner_join(acs3 %>% st_set_geometry(NULL)) %>% inner_join(names) | |
complicated %>% ggplot() + geom_point(aes(x = clinton_share, y = biden_share - clinton_share, color = `top group`))+ scale_y_continuous(limits = c(-.17, 0), labels = scales::percent) + facet_wrap(~BoroName) + scale_color_brewer(type="qual", direction = 1) + theme_bw() + labs(title = "Fall in Biden support is correlated to ethnicity, not to vote share." ) | |
g = complicated %>% select(OBJECTID, delta = biden_share/clinton_share) %>% | |
inner_join(acs %>% st_set_geometry(NULL) %>% select(OBJECTID, matches("^(asn|hsp|wtnh|blnh).*P$"))) | |
d = g %>% st_set_geometry(NULL) %>% select(-OBJECTID) | |
model = lm(log(delta) ~ blnhP + asnnhP + hspmeP + hspsalvP + hspdomP, data = d) | |
summary(model) | |
subway = read_sf("/drobo/Downloads/Subway Lines.geojson") %>% st_transform(crs=2263) | |
install.packages("lwgeom") | |
subway %>% group_by(name) %>% summarize(l) | |
subway_lines = subway %>% mutate(line = name %>% str_split("-")) %>% | |
unnest(line) %>% group_by(line) %>% | |
summarize(len = sum(as.numeric(shape_len))) | |
subs = jointmap %>% st_join(subway_lines, join = st_is_within_distance, 5280/2, left = FALSE) %>% | |
group_by(line) %>% summarize(across(starts_with("votes"), sum, na.rm=TRUE)) %>% mutate( | |
share = votes_2020Trump/(votes_2020Trump + votes_2020Biden | |
)) | |
subs %>% ggplot() + geom_sf(data = boros, fill = "#BBBBBB", lwd=0) + facet_wrap(~reorder(line, share)) + | |
theme_bw() + scale_fill_viridis_c("Clinton Share") + geom_sf(aes(fill = share), lwd=0) + labs(title = "Election districts on subway lines, by Clinton vote share.", caption = "Ben Schmidt--10:00AM 2020-11-06 10:00AM") | |
subs %>% group_by(line) %>% ggplot(aes(x = line, y = votes_2020Biden)) + geom_col() + coord_flip() | |
boros = acs %>% group_by(BoroName) %>% summarize(codes = n()) %>% | |
filter(BoroName != "Staten Island") %>% st_transform(crs=2263) %>% st_simplify(dTolerance = 30) | |
ggplot(boros) | |
shifts | |
colors = tribble(~line, ~color, list("A", "C", "E"), "#0039A6", list("1", "2", "3"), "#EE352E", list("4", "5", "6"), "#00933C", list("7"), "#B933AD", list("B", "D", "F", "M"), "#FF6319", list("G"), "#6CBE45", list("J", "Z"), "#996633", list("L"), "#A7A9AC", list("N", "Q", "R", "W"), "#FCCC0A", list("S"), "#808183") %>% | |
unnest(line) %>% unnest(line) | |
ethnic_groups %>% filter(name %>% str_detect("hspsamE|asneastE|asnsouthE|asnseastE|othnh", negate = TRUE)) %>% group_by(OBJECTID) | |
%>% mutate(share = ) | |
names = tribble(~name, ~`top group`, | |
"asneastE", "East Asian", | |
"asnsouthE", "South Asian", | |
"blnhE", "Non-hispanic Black", | |
"hspdomE", "Dominican/Puerto Rican", | |
"hspmeE", "Mexican", | |
"hspprE", "Dominican/Puerto Rican", | |
"hspsamE", "South American", | |
"wtnhE", "White non-hispanic") | |
subs %>% inner_join(colors) %>% ggplot() + geom_col() + aes(x = reorder(line, share), y = share, fill = color) + scale_fill_identity() + coord_flip() + theme_bw() + scale_x_discrete("Line") + labs(title = "Trump share in precincts within 1/2 mile of train tracks, by subway line") + scale_y_continuous(labels = scales::percent ) | |
``` | |
```{r} | |
``` | |
```{r} | |
g %>% filter(BoroName %in% c("Brooklyn", "Queens")) %>% ggplot() + geom_sf(aes(fill = share_2020Biden- share_2016Clinton), lwd=.03) + | |
scale_fill_distiller(type = "div", limits = c(-.15, .15), direction = 1, palette = 5, labels = scales::percent, oob = scales::squish) + labs(title = "As of Nov 5, NYC prez results", caption="Ben Schmidt") + theme_bw() + facet_wrap(~name) | |
``` | |
```{r} | |
joint %>% st_simplify(dTolerance = .0001) %>% | |
ggplot() + geom_sf(aes(fill = share), lwd=.33) + scale_fill_viridis_c() | |
tidied_2020 | |
tidied_2016 %>% sample_n(10) | |
tidied_2016 %>% filter(!is.na(party)) %>% | |
count(cand, wt=votes) | |
t | |
system("mkdir -p NYC_elections") | |
shapes %>% head | |
#pg %>% html_nodes("a") %>% html_attr("href") %>% keep(str_detect, ".*EDLevel.csv$") | |
#year = read_xml("https://www.vote.nyc.ny.us/html/results/2018.shtml", as_html = TRUE) | |
#xml_find_all(year, "") | |
#fr = "~/Downloads/01000200000Citywide Democratic President Citywide EDLevel.csv" | |
e1 %>% filter(party == "Democratic") %>% inner_join(e2, by = c("elect_dist" = "elect_dist", party = "party")) %>% mutate(relative = share.x/share.y) %>% select(elect_dist, relative, party) -> | |
relative | |
shapes %>% inner_join(e3 %>% filter(name == "Hillary Clinton")) %>% ggplot() + geom_sf(aes(fill=share)) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment