Skip to content

Instantly share code, notes, and snippets.

@vankesteren
Created August 22, 2024 15:05
Show Gist options
  • Save vankesteren/779de92cfd2989fac47d7e529d8fc02e to your computer and use it in GitHub Desktop.
Save vankesteren/779de92cfd2989fac47d7e529d8fc02e to your computer and use it in GitHub Desktop.
SCM factor structure (WIP)
# panel data factor model simulation
# See hollingsworth & wing (2022) tactics for design..
# paragraph 2.3, assumption 2
n_timepoints <- 100
n_unobserved <- 1
n_units <- 200
n_covar <- 5
ru <- 0.6
mean_ly <- 1
sd_ly <- 0.1
sd_ey <- 0.4
mean_lx <- 1
sd_lx <- 0.1
sd_ex <- 0.4
rarnorm <- function(n, mean = 0, sd = 1, phi = 0) {
# argument checks
stopifnot(abs(phi) < 1)
if (phi == 0) return(rnorm(n, mean, sd))
# create vector, pick first value, and fill remaining ones
x <- numeric(n)
x[1] <- rnorm(1, mean, sd)
for (i in 2:n)
x[i] <- rnorm(1, mean + phi*(x[i-1]-mean), sqrt(sd^2 - sd^2 * phi^2))
return(x)
}
# Unobserved common time trend
U <- sapply(1:n_unobserved, \(x) rarnorm(n_timepoints, sd = 1, phi = ru))
# outcome is a function of common time trend
# the loadings
LY <- matrix(rnorm(n_units*n_unobserved, mean = mean_ly, sd = sd_ly), n_unobserved)
# the residuals, vary over time and units
# transitory shock
EY <- matrix(rnorm(n_timepoints*n_units, sd = sd_ey), n_timepoints)
Y <- U%*%LY + EY
# covariates are also a function of common time trend plus error
LX <- array(rnorm(n_unobserved * n_units * n_covar, mean = mean_lx, sd = sd_lx), dim = c(n_unobserved, n_units, n_covar))
EX <- array(rnorm(n_timepoints * n_units * n_covar, sd = sd_ex), dim = c(n_timepoints, n_units, n_covar))
X <- array(dim = c(n_timepoints, n_units, n_covar))
for (j in 1:n_covar) {
X[,,j] <- U %*% LX[,,j]
}
X <- X + EX
plot(X[,1,1],type = "l")
lines(Y[,1],type = "l", lty = 2)
cor(X[,1,1], Y[,1])
# the loadings, vary over units but not over time
L <- matrix(rnorm(n_units*n_factors, sd = 1), n_factors)
scores <- array(dim = c(n_timepoints, n_units, n_factors))
for (t in 1:n_timepoints) {
for (i in 1:n_units) {
scores[t,i,] <- A[t,] * L[,i]
}
}
Y <- A%*%L + E
Y1 <- Y[13:18, 1, drop = FALSE] + 1.5
Y0 <- Y[13:18, -1]
Z1 <- Y[1:12, 1, drop = FALSE]
Z0 <- Y[1:12, -1]
fit <- pensynth::pensynth(X1 = Y[1:12,1,drop=FALSE], X0 = Y[1:12,-1], lambda = 1e-2)
Y0 <- predict(fit, Y[13:18, -1])
Ys <- scale(Y[1:12,], scale = FALSE)
Yst <- t(scale(t(Ys), scale = FALSE))
# fit <- glmnet::cv.glmnet(x = Y[1:12,-1], y = Y[1:12,1])
# Y0 <- predict(fit, newx = Y[13:18, -1])
hist(ATT, breaks = "FD")
mean(ATT)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment