-
-
Save ttmmghmm/d45538769c51239fe56b to your computer and use it in GitHub Desktop.
dplyr ems
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
library(magrittr) | |
library(ggplot2) | |
library(dplyr) | |
library(tidyr) | |
library(scales) | |
# get page | |
pg <- html("http://www.bls.gov/opub/ted/2015/consumer-spending-by-age-group-in-2013.htm#tab-2") | |
# extract table | |
pg %>% | |
html_nodes("table") %>% | |
extract2(1) %>% | |
html_table(header=TRUE) %>% | |
rename(spending_category=`Spending category`) %>% | |
filter(spending_category != "Total") %>% | |
gather(age_group, value, -spending_category) %>% | |
mutate(label=percent(value/100)) -> spending | |
# for facet ordering | |
spending %>% | |
group_by(spending_category) %>% | |
summarise(μ=mean(value)) %>% | |
arrange(μ) %>% .$spending_category -> cat_levels | |
# plot | |
spending %>% | |
mutate(spending_category=factor(spending_category, | |
levels=cat_levels, ordered=TRUE)) %>% | |
ggplot(aes(x=age_group, y=value)) + | |
geom_bar(stat="identity", aes(fill=spending_category)) + | |
geom_text(aes(y=value+5, label=label), size=2.5) + | |
scale_x_discrete(expand=c(0, 0)) + | |
scale_y_continuous(expand=c(0, 0), limits=c(0, max(spending$value)+10)) + | |
labs(x=NULL, y=NULL, title="Consumer spending by age group in 2013\n") + | |
facet_wrap(~spending_category) + | |
coord_flip() + | |
theme(panel.background=element_rect(fill="#f0f0f0")) + | |
theme(panel.grid=element_blank()) + | |
theme(panel.border=element_blank()) + | |
theme(panel.margin=unit(1, "lines")) + | |
theme(strip.background=element_blank()) + | |
theme(strip.text=element_text(size=6.5)) + | |
theme(axis.ticks=element_blank()) + | |
theme(axis.text.x=element_blank()) + | |
theme(legend.position="none") -> gg | |
# save | |
ggsave("bls_spending.svg", width=9, height=6) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://www.r-bloggers.com/animated-us-hexbin-map-of-the-avian-flu-outbreak/ | |
# To make an animated map of cumulative flock totals by week | |
library(rvest) # scraping | |
library(stringr) # string manipulation | |
library(lubridate) # date conversion | |
library(dplyr) # data mjnging | |
library(zoo) # for locf | |
library(ggplot2) # plotting | |
library(rgdal) # map stuff | |
library(rgeos) # map stuff | |
"http://www.aphis.usda.gov/wps/portal/aphis/ourfocus/animalhealth/sa_animal_disease_information/sa_avian_health/ct_avian_influenza_disease/!ut/p/a1/lVJbb4IwFP41e1qwFZDLI-oUnGgyswm8kAMUaAaFQNG4X7-ibnEPYtakDz3nO_kupyhAHgoYHGgGnFYMiv4daOFqa8vjKZad5c58wc7mY-Eaa13Z2qoA-AKA7xwL_53fvjpaP_-Gp_Z8jHcK2qMABTHjNc-RD3VO2zCuGCeMhwWNGmhOT7iFsOqaMK3irj2_gNESijAnUPD8tpLQlkBLQsrSqinPJi7tAwX2i4_5tSBgRUfYF_wM9mLqmCbIj2QzxZpMJMUYg6TGkSLBBCaSPEnSJIljXVH0q_kBdw_CO5sXkNnSslV9LQJTDRk7czGumy7GjnYFDOTrCw36XRJTRbt_mlo9VD1FgfuctqrVByA37szNBAPwXOpzR97gPi7tm30gb2AfQkxWVJH4ifvZLavFIsUQrA1JSUOaUV61HHnH43HUtQmMsuqA6vK9NJST9JluNlKwyPr7DT6YvRs!/?1dmy&urile=wcm%3apath%3a%2Faphis_content_library%2Fsa_our_focus%2Fsa_animal_health%2Fsa_animal_disease_information%2Fsa_avian_health%2Fsa_detections_by_states%2Fct_ai_pacific_flyway" %>% | |
#' read in the data, extract the table and clean up the fields | |
#' also clean up the column names to since they are fairly nasty | |
html -> pg | |
pg %>% | |
html_nodes("table") %>% | |
# two tables in the code and we only need the first one. | |
magrittr::extract2(1) %>% | |
html_table(header=TRUE) %>% | |
filter(`Flock size`!="pending") %>% | |
# inconsistent in the values used for various columns. | |
# scan the rendered table on the USDA page by eye - the column names are horrible | |
# commas in the flock counts - handy to have the date as an actual date type | |
mutate(Species=str_replace(tolower(Species), "s$", ""), | |
`Avian influenza subtype*`=str_replace_all(`Avian influenza subtype*`, " ", ""), | |
`Flock size`=as.numeric(str_replace_all(`Flock size`, ",", "")), | |
`Confirmation date`=as.Date(mdy(`Confirmation date`))) %>% | |
rename(state=State, county=County, flyway=Flyway, flock_type=`Flock type`, | |
species=Species, subtype=`Avian influenza subtype*`, date=`Confirmation date`, | |
flock_size=`Flock size`) -> birds | |
# To make an animated map of cumulative flock totals by week | |
birds %>% | |
mutate(week=as.numeric(format(birds$date, "%Y%U"))) %>% | |
arrange(week) %>% | |
group_by(week, state) %>% | |
tally(flock_size) %>% | |
group_by(state) %>% | |
# calculate the cumulative sums | |
mutate(cum=cumsum(n)) %>% | |
ungroup %>% | |
select(week, state, cum) %>% | |
mutate(week=as.Date(paste(week, 1), "%Y%U %u")) %>% | |
# fill in the gaps where there are missing state/week combinations | |
left_join(tidyr::expand(., week, state), .) %>% | |
# carry the last observations by state/week forward in this expanded data frame | |
group_by(state) %>% | |
do(na.locf(.)) %>% | |
# make breaks for data ranges so we can more intelligently map them to colors | |
mutate(state_abb=state.abb[match(state, state.name)], | |
cum=as.numeric(ifelse(is.na(cum), 0, cum)), | |
brks=cut(cum, | |
breaks=c(0, 200, 50000, 1000000, 10000000, 50000000), | |
labels=c("1-200", "201-50K", "50k-1m", | |
"1m-10m", "10m-50m"))) -> by_state_and_week | |
# standard animation steps: | |
# determine where we’re going to break the data up | |
# feed that into a loop | |
# partition the data in the loop | |
# render the plot to a file | |
# combine all the individual images into an animation | |
i <- 0 | |
for (wk in unique(by_state_and_week$week)) { | |
# filter by week | |
by_state_and_week %>% filter(week==wk) -> this_wk | |
# hack to let us color the state labels in white or black depending on | |
# the value of the fill | |
this_wk %>% | |
filter(brks %in% c("1m-10m", "10m-50m")) %>% | |
.$state_abb %>% | |
unique -> white_states | |
centers %>% | |
mutate(txt_col="black") %>% | |
mutate(txt_col=ifelse(id %in% white_states, "white", "black")) -> centers | |
# setup the plot | |
gg <- ggplot() | |
gg <- gg + geom_map(data=us_map, map=us_map, | |
aes(x=long, y=lat, map_id=id), | |
color="white", fill="#dddddd", size=2) | |
gg <- gg + geom_map(data=this_wk, map=us_map, | |
aes(fill=brks, map_id=state_abb), | |
color="white", size=2) | |
gg <- gg + geom_text(data=centers, | |
aes(label=id, x=x, y=y, color=txt_col), size=4) | |
gg <- gg + scale_color_identity() | |
gg <- gg + scale_fill_brewer(name="Combined flock sizen(all types)", | |
palette="RdPu", na.value="#dddddd", drop=FALSE) | |
gg <- gg + guides(fill=guide_legend(override.aes=list(colour=NA))) | |
gg <- gg + coord_map() | |
gg <- gg + labs(x=NULL, y=NULL, | |
title=sprintf("U.S. Avian Flu Total Impact as of %sn", wk)) | |
gg <- gg + theme_bw() | |
gg <- gg + theme(plot.title=element_text(face="bold", hjust=0, size=24)) | |
gg <- gg + theme(panel.border=element_blank()) | |
gg <- gg + theme(panel.grid=element_blank()) | |
gg <- gg + theme(axis.ticks=element_blank()) | |
gg <- gg + theme(axis.text=element_blank()) | |
gg <- gg + theme(legend.position="bottom") | |
gg <- gg + theme(legend.direction="horizontal") | |
gg <- gg + theme(legend.title.align=1) | |
print(gg) | |
i <- i + 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment