-
-
Save bbroke/7047eba0c4c3223fdea1e498913a0bf2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# required libraries | |
library(XML) | |
library(uuid) | |
library(stringr) | |
library(plyr) | |
library(reshape2) | |
library(ggplot2) | |
library(doParallel) | |
doParallel::registerDoParallel(cores = 4) | |
library(maps) | |
library(mapproj) | |
f <- "https://raw.githubusercontent.com/chris-taylor/USElection/master/data/electoral-college-votes.csv" | |
electoral.college <- read.csv(f, header=FALSE) | |
names(electoral.college) <- c("state", "electoral_votes") | |
head(electoral.college) | |
# exclude D.C. from the data pull b/c there aren't any polls!. we'll add it in manually | |
states <- electoral.college$state[c(1:7, 9:51)] | |
results <- ldply(states, function(state) { | |
url <- "http://www.electionprojection.com/latest-polls/%s-presidential-polls-trump-vs-clinton-vs-johnson-vs-stein.php" | |
state.fmt <- gsub(" ", "-", tolower(state)) | |
url.state <- sprintf(url, state.fmt) | |
print(url.state) | |
r <- readHTMLTable(url.state, stringsAsFactors=FALSE)[[3]] | |
r$state <- state | |
r$id <- 1:nrow(r) | |
cols <- c("Dates", "Firm", "state", "Clinton", "Trump", "Johnson", "id") | |
r <- r[2:nrow(r),][,cols] | |
r <- melt(r, id=c("Dates", "Firm", "state", "id"), variable.name="candidate", value.name="vote") | |
names(r) <- c("date", "poll", "state", "id", "candidate", "vote") | |
r$race <- "" | |
cols <- c("date", "race", "state", "poll", "candidate", "vote", "id") | |
r <- r[,cols] | |
r$vote <- as.numeric(r$vote) | |
r | |
}) | |
# adding D.C. on manually b/c it's slightly different. it also doesn't produce material changes to the | |
# results | |
results <- rbind(results, data.frame( | |
date='10/20 - 10/28', | |
race='', | |
state='District of Columbia', | |
poll='SurveyMonkey', | |
candidate=c("Clinton", "Trump", "Johnson"), | |
vote=c(87, 5, 4), | |
id=1 | |
)) | |
head(results) | |
tail(results) | |
table(results$candidate) | |
table(results$state) | |
results <- results[order(results$state, results$id, results$candidate),] | |
poll.freq <- data.frame(table(results$state)) | |
ggplot(poll.freq, aes(x=Var1, weight=Freq)) + | |
geom_bar() + | |
coord_flip() + | |
scale_y_continuous("# of Polls") + | |
scale_x_discrete("State", limits=rev(levels(poll.freq$Var1))) | |
weight <- function(i) { | |
exp(1)*1 / exp(i) | |
} | |
w <- data.frame(poll=1:8, weight=weight(1:8)) | |
ggplot(w, aes(x=poll, weight=weight)) + | |
geom_bar() + | |
scale_x_continuous("nth poll", breaks=1:8) + | |
scale_y_continuous("weight") | |
election.sim <- function() { | |
ddply(results, .(state), function(polls.state) { | |
polls.state$.id <- NULL | |
polls.state$id <- cumsum(!duplicated(polls.state$id)) | |
polls.state$weight <- weight(polls.state$id) | |
polls.state$weighted_vote <- polls.state$vote * polls.state$weight | |
tally <- ddply(polls.state, .(candidate), function(p) { | |
r <- rnorm(nrow(p), 1, .15) | |
data.frame(weighted_vote=sum(p$weighted_vote * r)) | |
}) | |
tally <- head(tally, 3) | |
tally$estimated_popular_vote <- tally$weighted_vote / sum(tally$weighted_vote) | |
tally | |
}) | |
} | |
(election <- election.sim()) | |
colormap <- c(Clinton="#179ee0", Trump="#ff5d40", Johnson="#f6b900") | |
ggplot(election, aes(x=candidate, weight=estimated_popular_vote, fill=candidate)) + | |
geom_bar() + | |
facet_wrap(~state) + | |
scale_fill_manual(values=colormap) + | |
scale_y_continuous(labels = scales::percent, breaks=c(0, 0.25, 0.5, 0.75, 1)) + | |
theme(axis.title.x=element_blank(), | |
axis.text.x=element_blank(), | |
axis.ticks.x=element_blank(), | |
axis.title.y=element_blank()) | |
simulated.state.results <- ldply(1:10000, function(i) { | |
election <- election.sim() | |
election.results <- dcast(election, state ~ candidate, value.var="estimated_popular_vote") | |
election.results <- merge(election.results, electoral.college, by.x="state", by.y="state", all.x = TRUE) | |
candidates <- c("Clinton", "Trump", "Johnson") | |
election.results$winner <- candidates[max.col(election.results[,candidates])] | |
election.results$sim_id <- UUIDgenerate() | |
election.results | |
}, .progress="text", .parallel=TRUE) | |
simulated.elections <- ddply(simulated.state.results, .(sim_id), function(simulation) { | |
clinton <- sum(ifelse(simulation$Clinton > simulation$Trump & simulation$Clinton > simulation$Johnson, simulation$electoral_votes, 0)) | |
trump <- sum(ifelse(simulation$Clinton < simulation$Trump & simulation$Trump > simulation$Johnson, simulation$electoral_votes, 0)) | |
johnson <- sum(ifelse(simulation$Johnson > simulation$Trump & simulation$Johnson > simulation$Trump, simulation$electoral_votes, 0)) | |
data.frame( | |
clinton=clinton, | |
trump=trump, | |
johnson=johnson, | |
winner=ifelse(clinton > trump, "Clinton", "Trump") | |
) | |
}, .progress="text", .parallel=TRUE) | |
table(simulated.elections$winner) | |
table(simulated.elections$winner) / nrow(simulated.elections) | |
summary(simulated.elections$clinton) | |
summary(simulated.elections$trump) | |
summary(simulated.elections$johnson) | |
head(simulated.elections) | |
ggplot(melt(simulated.elections[,1:4], id.vars = "sim_id"), aes(x=value, fill=variable)) + | |
geom_histogram(position="identity", alpha=0.7) + | |
scale_fill_manual(values=c(clinton="#179ee0", trump="#ff5d40", johnson="#f6b900")) | |
ggplot(melt(simulated.elections[,1:3], id.vars = "sim_id"), aes(x=value, fill=variable)) + | |
geom_histogram(position="identity", alpha=0.7) + | |
scale_fill_manual(values=c(clinton="#179ee0", trump="#ff5d40", johnson="#f6b900")) | |
ggplot(melt(simulated.elections[,1:3], id.vars = "sim_id"), aes(x=value, fill=variable)) + | |
geom_density(position="identity", alpha=0.7) + | |
scale_fill_manual(values=c(clinton="#179ee0", trump="#ff5d40", johnson="#f6b900")) | |
# popular vote | |
clinton <- sum(simulated.elections$clinton * simulated.elections$electoral_votes) | |
trump <- sum(simulated.elections$trump * simulated.elections$electoral_votes) | |
johnson <- sum(simulated.elections$johnson * simulated.elections$electoral_votes) | |
total <- sum(clinton, trump, johnson) | |
data.frame(Clinton=clinton/total, Trump=trump/total, Johnson=johnson/total) | |
simulated.state.results.agg <- ddply(simulated.state.results, .(state), function(state) { | |
data.frame( | |
state=state$state[1], | |
trump=sum(state$winner=="Trump") / nrow(state), | |
clinton=sum(state$winner=="Clinton") / nrow(state), | |
johnson=sum(state$winner=="Johnson") / nrow(state), | |
electoral_votes=state$electoral_votes[1], | |
n=nrow(state) | |
) | |
}) | |
simulated.state.results.agg | |
data.frame( | |
Trump=sum(simulated.state.results.agg$electoral_votes * simulated.state.results.agg$trump), | |
Clinton=sum(simulated.state.results.agg$electoral_votes * simulated.state.results.agg$clinton), | |
Johnson=sum(simulated.state.results.agg$electoral_votes * simulated.state.results.agg$johnson) | |
) | |
us.states <- map_data("state") | |
simulated.state.results.agg$state.mergecol <- as.character(tolower(simulated.state.results.agg$state)) | |
state.plot <- merge(us.states, simulated.state.results.agg, by.x="region", by.y="state.mergecol") | |
table(us.states$region) | |
state.plot$winner <- ifelse(state.plot$trump > state.plot$clinton, "Trump", "Clinton") | |
# binary outcomes for states | |
ggplot(state.plot, aes(x=long, y=lat, group=group, fill=winner)) + | |
geom_polygon(colour="white") + | |
scale_fill_manual(values=colormap) + | |
coord_map() | |
# shaded outcomes for states | |
ggplot(state.plot, aes(x=long, y=lat)) + | |
geom_polygon(aes(group=group, fill=trump), colour="grey10") + | |
scale_fill_gradient2("", low=colormap["Clinton"], mid="white", high=colormap["Trump"], midpoint=0.5, | |
breaks=c(0, 0.5, 1), labels=c("Clinton", "?", "Trump")) + | |
theme_minimal() + | |
theme(axis.title.x=element_blank(), | |
axis.text.x=element_blank(), | |
axis.ticks.x=element_blank(), | |
axis.title.y=element_blank(), | |
axis.text.y=element_blank(), | |
axis.ticks.y=element_blank(), | |
legend.text=element_text(size=12, face="bold")) + | |
coord_map() | |
r <- "https://gist.githubusercontent.com/bbroke/5880cd1bc785f8e2e153147d49ca63dd/raw/4fc0e7ecf9d47ad26a6260e209dea0d397c93455/regions.csv" | |
us.regions <- read.csv(r) | |
state.plot <- merge(state.plot, us.regions, by.x="state", by.y="state") | |
head(state.plot) | |
# http://www.census.gov/econ/census/help/geography/regions_and_divisions.html | |
# http://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf | |
ddply(state.plot, .(division), function(div) { | |
p <- ggplot(div, aes(x=long, y=lat)) + | |
geom_polygon(aes(group=group, fill=trump), colour="grey10") + | |
scale_fill_gradient2("", low=colormap["Clinton"], mid="white", high=colormap["Trump"], midpoint=0.5, | |
guide=FALSE) + | |
theme_minimal() + | |
theme(axis.title.x=element_blank(), | |
axis.text.x=element_blank(), | |
axis.ticks.x=element_blank(), | |
axis.title.y=element_blank(), | |
axis.text.y=element_blank(), | |
axis.ticks.y=element_blank(), | |
legend.text=element_text(size=12, face="bold")) + | |
coord_map() + | |
ggtitle(div$division[1]) | |
filename <- paste0("election-region", tolower(str_replace_all(div$division[1], " ", "-")), ".png") | |
print(p) | |
ggsave(filename) | |
NULL | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment