Last active
April 15, 2020 09:12
-
-
Save anglilian/4877735ae14dfea80bf7bdc7956fb98b to your computer and use it in GitHub Desktop.
CS112 Final Project Extension Code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#leave-two-out test + matching | |
# Clear the workspace | |
rm(list = ls()) | |
#load the required libraries | |
#Run the commented code to install the libraries | |
#install.packages("Synth") | |
#install.packages("gtools") | |
library(Synth) | |
library(gtools) | |
# Please set your working directory to the data/ folder | |
#setwd() | |
# Load data | |
#download the data from here -> https://doi.org/10.7910/DVN/0XOYTG/RIYVNR | |
df <- read.csv("df.csv", header = TRUE) | |
# Prepare dataset | |
df$state <- as.character(df$state) # required by dataprep() | |
#main model of Synthetic control + implementing better matching | |
dataprep.out3 <- | |
dataprep(df, | |
predictors = c("state.gdp.capita", | |
"state.gdp.growth.percent", | |
"population.projection.ln", | |
"years.schooling.imp" | |
), | |
special.predictors = list( | |
list("homicide.rates", 1993:1998, "mean"), | |
list("proportion.extreme.poverty", 1993:1998, "mean"), | |
list("gini.imp", 1993:1998, "mean") | |
), | |
predictors.op = "mean", | |
dependent = "homicide.rates", | |
unit.variable = "code", | |
time.variable = "year", | |
unit.names.variable = "state", | |
treatment.identifier = 35, | |
controls.identifier = c(11:17, 21:27, 31:33, 41:43, 50:53), | |
time.predictors.prior = c(1993:1998), | |
time.optimize.ssr = c(1993:1998), | |
time.plot = c(1993:2009) | |
) | |
# Run synth | |
synth.out3 <- synth(dataprep.out3) | |
#leave-two-out analysis | |
# Loop over leave two outs | |
#this is to store the results of year values for each iteration | |
#there are 6 combinations, hence 10 columns | |
storegaps <- matrix(NA, length(1993:2009), 10) | |
#colnames stores the codes of the states left out | |
colnames(storegaps) <- c("14, 32", "14, 33", "14, 42", "14, 53", | |
"32, 33", "32, 42", "32, 53", | |
"33, 42", "33, 53", | |
"42, 53") | |
#this contains the codes for all states except Sao Paulo | |
co <- unique(df$code) | |
co <- co[-25] | |
#a list of all the combinations of the 5 states | |
comb.omit <- combinations(5, 2, c(14, 32, 33, 42, 53)) | |
for(k in 1:10){ | |
# Data prep for training model | |
omit <- comb.omit[k, ] | |
# Prepare data for synth | |
dataprep.out2 <- | |
dataprep(df, | |
predictors = c("state.gdp.capita", | |
"state.gdp.growth.percent", | |
"population.projection.ln", | |
"years.schooling.imp" | |
), | |
special.predictors = list( | |
list("homicide.rates", 1993:1998, "mean"), | |
list("proportion.extreme.poverty", 1993:1998, "mean"), | |
list("gini.imp", 1993:1998, "mean") | |
), | |
predictors.op = "mean", | |
dependent = "homicide.rates", | |
unit.variable = "code", | |
time.variable = "year", | |
unit.names.variable = "state", | |
treatment.identifier = 35, | |
controls.identifier = co[-c(which(co==omit[1]), which(co==omit[2]))], | |
time.predictors.prior = c(1993:1998), | |
time.optimize.ssr = c(1993:1998), | |
time.plot = c(1993:2009) | |
) | |
# Run synth | |
synth.out2 <- synth(dataprep.out2) | |
#Store the results | |
storegaps[,k] <- (dataprep.out2$Y0%*%synth.out2$solution.w) | |
} # Close loop over leave one outs | |
# Leave-two-out: graph | |
#Plot the main model with Sao Paulo and Synthetic control + better matching | |
path.plot(synth.res = synth.out3, | |
dataprep.res = dataprep.out3, | |
Ylab = c("Homicide Rates"), | |
Xlab = c("Year"), | |
Legend = c("Sao Paulo","Synthetic Sao Paulo"), | |
Legend.position = c("bottomleft") | |
) | |
#add the line at treatment | |
abline(v = 1999, | |
lty = 2) | |
#add the arrow and text to indicate policy change | |
arrows(1997, 50, 1999, 50, | |
col = "black", | |
length = .1) | |
text(1995, 50, | |
"Policy Change", | |
cex = .8) | |
#plot all the leave 2 out synthetic controls | |
for(i in 1:10){ | |
lines(1993:2009, | |
storegaps[,i], | |
col = "darkgrey", | |
lty = "solid") | |
} | |
#add the legend | |
legend(x = "bottomleft", | |
legend = c("Sao Paulo", | |
"Synthetic Sao Paulo", | |
"Synthetic Sao Paulo (leave-two-out)" | |
), | |
lty = c("solid", "dashed", "solid"), | |
col = c("black", "black", "darkgrey"), | |
cex = .8, | |
bg = "white", | |
lwdc(2, 2, 1) | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Clear the workspace | |
rm(list = ls()) | |
# Load necessary packages | |
library(dplyr) # data manipulation | |
library(Synth) # models | |
# Load data | |
df <- read.csv("df.csv", header = TRUE) | |
# Prepare dataset | |
df$state <- as.character(df$state) # required by dataprep() | |
# Prepare data for synth | |
dataprep.out <- | |
dataprep(df, | |
predictors = c("state.gdp.capita", | |
"population.projection.ln", | |
"years.schooling.imp" | |
), | |
special.predictors = list( | |
list("state.gdp.growth.percent", 1993:1998, "mean"), | |
list("homicide.rates", 1990:1998, "mean"), | |
list("proportion.extreme.poverty", 1990:1998, "mean"), | |
list("gini.imp", 1990:1998, "mean") | |
), | |
predictors.op = "mean", | |
dependent = "homicide.rates", | |
unit.variable = "code", | |
time.variable = "year", | |
unit.names.variable = "state", | |
treatment.identifier = 35, | |
controls.identifier = c(11:17, 21:27, 31:33, 41:43, 50:53), | |
time.predictors.prior = c(1990:1998), | |
time.optimize.ssr = c(1990:1998), | |
time.plot = c(1990:2009) | |
) | |
## Permutation test | |
states <- c(11:17, 21:27, 31:33, 35, 41:43, 50:53) | |
# Prepare data for synth | |
results <- list() | |
results_synth <- list() | |
gaps <- list() | |
for (i in states) { | |
dataprep.out <- | |
dataprep(foo=df, | |
predictors = c("state.gdp.capita", | |
"population.projection.ln", | |
"years.schooling.imp" | |
), | |
special.predictors = list( | |
list("state.gdp.growth.percent", 1993:1998, "mean"), #shortened pretreatment year from 2000 to 2003 | |
list("homicide.rates", 1990:1998, "mean"), | |
list("proportion.extreme.poverty", 1990:1998, "mean"), | |
list("gini.imp", 1990:1998, "mean") | |
), | |
predictors.op = "mean", | |
dependent = "homicide.rates", | |
unit.variable = "code", | |
time.variable = "year", | |
unit.names.variable = "state", | |
treatment.identifier = i, | |
controls.identifier = states[which(states!=i)], | |
time.predictors.prior = c(1990:1998), | |
time.optimize.ssr = c(1990:1998), | |
time.plot = c(1990:2009) | |
) | |
results[[as.character(i)]] <- dataprep.out | |
results_synth[[as.character(i)]] <- synth(results[[as.character(i)]]) | |
gaps[[as.character(i)]] <- results[[as.character(i)]]$Y1plot - (results[[as.character(i)]]$Y0plot %*% results_synth[[as.character(i)]]$solution.w) | |
} | |
#Calculate MSPE | |
MSE <- c() | |
for (i in c(1:25)){ | |
year <- as.numeric(unlist(gaps[i])) | |
MSE[i]<- mean(year[1:9]^2) | |
} | |
low.mspe <- MSE[-which(MSE > 2*MSE[18],)] | |
# Permutation graph: states with MSPE no higher than 2x São Paulo's | |
selected_states <- match(low.mspe, MSE) | |
plot(1990:2009, | |
ylim = c(-30, 30), | |
xlim = c(1990,2009), | |
ylab = "Gap in Homicide Rates", | |
xlab = "Year" | |
) | |
for (i in selected_states) { | |
lines(1990:2009, | |
gaps[[i]], | |
col = "lightgrey", | |
lty = "solid", | |
lwd = 2 | |
) | |
} | |
lines(1990:2009, | |
gaps[["35"]], # São Paulo | |
col = "black", | |
lty = "solid", | |
lwd = 2 | |
) | |
abline(v = 1999, | |
lty = 2) | |
abline(h = 0, | |
lty = 1, | |
lwd = 1) | |
arrows(1997, 25, 1999, 25, | |
col = "black", | |
length = .1) | |
text(1995, 25, | |
"Policy Change", | |
cex = .8) | |
legend(x = "bottomleft", | |
legend = c("Sao Paulo", | |
"Control States (MSPE Less Than Two Times That of Sao Paulo)"), | |
lty = c("solid", "solid"), | |
col = c("black", "darkgrey"), | |
cex = .8, | |
bg = "white", | |
lwdc(2, 2, 1) | |
) | |
#Calculate MSPE | |
MSE <- c() | |
for (i in selected_states){ | |
year <- as.numeric(unlist(gaps[i])) | |
MSE[i]<- mean(year[10:20]^2) | |
} | |
MSE <- MSE[-which(is.na(MSE),)] | |
order(MSE) | |
MSE[18] | |
#Sao Paulo index 18 | |
pvalue <- 5/length(MSE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##################### | |
### Data Analysis ### | |
##################### | |
## Please set your working directory to the data/ folder | |
# Clear the workspace | |
rm(list = ls()) | |
# Load necessary packages | |
library(dplyr) # data manipulation | |
library(Synth) # models | |
# Load data | |
df <- read.csv("df.csv", header = TRUE) | |
# Prepare dataset | |
df$state <- as.character(df$state) # required by dataprep() | |
# Prepare data for synth | |
dataprep.out <- | |
dataprep(df, | |
predictors = c("state.gdp.capita", | |
"population.projection.ln", | |
"years.schooling.imp" | |
), | |
special.predictors = list( | |
list("state.gdp.growth.percent", 1993:1998, "mean"), #shortened pretreatment year from 2000 to 2003 | |
list("homicide.rates", 1990:1998, "mean"), | |
list("proportion.extreme.poverty", 1990:1998, "mean"), | |
list("gini.imp", 1990:1998, "mean") | |
), | |
predictors.op = "mean", | |
dependent = "homicide.rates", | |
unit.variable = "code", | |
time.variable = "year", | |
unit.names.variable = "state", | |
treatment.identifier = 35, | |
controls.identifier = c(11:17, 21:27, 31:33, 41:43, 50:53), | |
time.predictors.prior = c(1990:1998), | |
time.optimize.ssr = c(1990:1998), | |
time.plot = c(1990:2009) | |
) | |
# Run synth | |
synth.out <- synth(dataprep.out) | |
# Get result tables | |
print(synth.tables <- synth.tab( | |
dataprep.res = dataprep.out, | |
synth.res = synth.out) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment