vankesteren · August 22, 2024 15:05
diff --git a/scm_factor.R b/scm_factor.R
 # panel data factor model simulation
 # See hollingsworth & wing (2022) tactics for design..
 # paragraph 2.3, assumption 2
 n_timepoints <- 100
 n_unobserved <- 1
 n_units <- 200
 n_covar <- 5

 ru <- 0.6
 mean_ly <- 1
 sd_ly <- 0.1
 sd_ey <- 0.4
 mean_lx <- 1
 sd_lx <- 0.1
 sd_ex <- 0.4

 rarnorm <- function(n, mean = 0, sd = 1, phi = 0) {
  # argument checks
  stopifnot(abs(phi) < 1)
  if (phi == 0) return(rnorm(n, mean, sd))
  
  # create vector, pick first value, and fill remaining ones
  x <- numeric(n)
  x[1] <- rnorm(1, mean, sd)
  for (i in 2:n)
    x[i] <- rnorm(1, mean + phi*(x[i-1]-mean), sqrt(sd^2 - sd^2 * phi^2))
  return(x)
 }


 # Unobserved common time trend
 U <- sapply(1:n_unobserved, \(x) rarnorm(n_timepoints, sd = 1, phi = ru))


 # outcome is a function of common time trend
 # the loadings
 LY <- matrix(rnorm(n_units*n_unobserved, mean = mean_ly, sd = sd_ly), n_unobserved)

 # the residuals, vary over time and units
 # transitory shock
 EY <- matrix(rnorm(n_timepoints*n_units, sd = sd_ey), n_timepoints)


 Y <- U%*%LY + EY


 # covariates are also a function of common time trend plus error
 LX <- array(rnorm(n_unobserved * n_units * n_covar, mean = mean_lx, sd = sd_lx), dim = c(n_unobserved, n_units, n_covar))
 EX <- array(rnorm(n_timepoints * n_units * n_covar, sd = sd_ex), dim = c(n_timepoints, n_units, n_covar))

 X <- array(dim = c(n_timepoints, n_units, n_covar))
 for (j in 1:n_covar) {
  X[,,j] <- U %*% LX[,,j]
 }
 X <- X + EX


 plot(X[,1,1],type = "l")
 lines(Y[,1],type = "l", lty = 2)

 cor(X[,1,1], Y[,1])


 # the loadings, vary over units but not over time
 L <- matrix(rnorm(n_units*n_factors, sd = 1), n_factors)


 scores <- array(dim = c(n_timepoints, n_units, n_factors))
 for (t in 1:n_timepoints) {
  for (i in 1:n_units) {
    scores[t,i,] <- A[t,] * L[,i]
  }
 }


 Y <- A%*%L + E

 Y1 <- Y[13:18, 1, drop = FALSE] + 1.5
 Y0 <- Y[13:18, -1]
 Z1 <- Y[1:12, 1, drop = FALSE]
 Z0 <- Y[1:12, -1]




 fit <- pensynth::pensynth(X1 = Y[1:12,1,drop=FALSE], X0 = Y[1:12,-1], lambda = 1e-2)
 Y0 <- predict(fit, Y[13:18, -1])

 Ys <- scale(Y[1:12,], scale = FALSE)
 Yst <- t(scale(t(Ys), scale = FALSE))

 # fit <- glmnet::cv.glmnet(x = Y[1:12,-1], y = Y[1:12,1])
 # Y0 <- predict(fit, newx = Y[13:18, -1])

 hist(ATT, breaks = "FD")
 mean(ATT)
	# panel data factor model simulation
	# See hollingsworth & wing (2022) tactics for design..
	# paragraph 2.3, assumption 2
	n_timepoints <- 100
	n_unobserved <- 1
	n_units <- 200
	n_covar <- 5

	ru <- 0.6
	mean_ly <- 1
	sd_ly <- 0.1
	sd_ey <- 0.4
	mean_lx <- 1
	sd_lx <- 0.1
	sd_ex <- 0.4

	rarnorm <- function(n, mean = 0, sd = 1, phi = 0) {
	# argument checks
	stopifnot(abs(phi) < 1)
	if (phi == 0) return(rnorm(n, mean, sd))

	# create vector, pick first value, and fill remaining ones
	x <- numeric(n)
	x[1] <- rnorm(1, mean, sd)
	for (i in 2:n)
	x[i] <- rnorm(1, mean + phi(x[i-1]-mean), sqrt(sd^2 - sd^2 phi^2))
	return(x)
	}


	# Unobserved common time trend
	U <- sapply(1:n_unobserved, \(x) rarnorm(n_timepoints, sd = 1, phi = ru))


	# outcome is a function of common time trend
	# the loadings
	LY <- matrix(rnorm(n_units*n_unobserved, mean = mean_ly, sd = sd_ly), n_unobserved)

	# the residuals, vary over time and units
	# transitory shock
	EY <- matrix(rnorm(n_timepoints*n_units, sd = sd_ey), n_timepoints)


	Y <- U%*%LY + EY


	# covariates are also a function of common time trend plus error
	LX <- array(rnorm(n_unobserved * n_units * n_covar, mean = mean_lx, sd = sd_lx), dim = c(n_unobserved, n_units, n_covar))
	EX <- array(rnorm(n_timepoints * n_units * n_covar, sd = sd_ex), dim = c(n_timepoints, n_units, n_covar))

	X <- array(dim = c(n_timepoints, n_units, n_covar))
	for (j in 1:n_covar) {
	X[,,j] <- U %*% LX[,,j]
	}
	X <- X + EX


	plot(X[,1,1],type = "l")
	lines(Y[,1],type = "l", lty = 2)

	cor(X[,1,1], Y[,1])


	# the loadings, vary over units but not over time
	L <- matrix(rnorm(n_units*n_factors, sd = 1), n_factors)


	scores <- array(dim = c(n_timepoints, n_units, n_factors))
	for (t in 1:n_timepoints) {
	for (i in 1:n_units) {
	scores[t,i,] <- A[t,] * L[,i]
	}
	}


	Y <- A%*%L + E

	Y1 <- Y[13:18, 1, drop = FALSE] + 1.5
	Y0 <- Y[13:18, -1]
	Z1 <- Y[1:12, 1, drop = FALSE]
	Z0 <- Y[1:12, -1]




	fit <- pensynth::pensynth(X1 = Y[1:12,1,drop=FALSE], X0 = Y[1:12,-1], lambda = 1e-2)
	Y0 <- predict(fit, Y[13:18, -1])

	Ys <- scale(Y[1:12,], scale = FALSE)
	Yst <- t(scale(t(Ys), scale = FALSE))

	# fit <- glmnet::cv.glmnet(x = Y[1:12,-1], y = Y[1:12,1])
	# Y0 <- predict(fit, newx = Y[13:18, -1])

	hist(ATT, breaks = "FD")
	mean(ATT)