jschoeley · December 12, 2020 21:08
diff --git a/gistfile1.txt b/gistfile1.txt
 # Disaggregation of 2D histogram of Poisson-counts with irregular bins
 # based upon the penalized composite link mixed model
 #
 # Jonas Schöley, based on code from
 # Deirdre I.R. Douma: The Penalized Composite Link Mixed Model.
 # see further references in code

 # Given is a matrix of observed counts over 2 dimensions (here
 # year and age, i.e. a so-called Lexis surface).
 # Each entry in the matrix corresponds to a rectangular region on the
 # 2D surface. The regions do not overlap, but may be of irregular
 # dimensions. The regions completely fill the rectangle given by the
 # range of the age and year coordinates.
 # 
 # It is known that the counts have been aggregated into the regions
 # from counts observed on a smaller grid. The goal is to estimate those
 # original counts on a smaller grid. This is achieved by the penalized
 # composite link (mixed) model. Part of this model is a matrix <C> which
 # defines the aggregation pattern, i.e. the association between regions on
 # the original, fine grid and regions on the aggregation grid.

 # Init ------------------------------------------------------------

 set.seed(1987)

 library(Matrix)

 cnst <- list(
  # number of year-cells on the fine grid
  years = 120,
  # number of age-cells on the fine grid
  ages = 100,
  # two different age groupings
  age_grouping_a =
    rep(1:5, c(20, 10, 10, 10, 50)),
  age_grouping_b =
    rep(1:4, c(20, 20, 10, 50))
 )

 # Construct composition matrix ------------------------------------

 # number of cells in ungrouped lexis grid 
 J = cnst$years*cnst$ages
 # Lexis grid cells are sequentially enumerated in column-major order
 lexis_grid_ungrouped <-
  matrix(1:J, nrow = cnst$ages, ncol = cnst$years)

 # superimpose the aggregation pattern on top of the ungrouped lexis grid
 lexis_grid_aggregation_pattern <- matrix(
  c(rep(cnst$age_grouping_a, 100), rep(cnst$age_grouping_b, 20)),
  nrow = cnst$ages, ncol = cnst$years
 )
 # plot the aggregation pattern
 image(t(lexis_grid_aggregation_pattern))

 # give a unique label to each aggregation region on the ungrouped lexis grid
 lexis_grid_aggregation_key <- lexis_grid_aggregation_pattern
 for (j in 2:cnst$years) {
  lexis_grid_aggregation_key[,j] <-
    lexis_grid_aggregation_key[cnst$ages,j-1] +
    lexis_grid_aggregation_key[,j]
 }
 # number of cells in grouped lexis surface
 I = max(lexis_grid_aggregation_key)

 # composition matrix, associates ungrouped Lexis grid cells (columns)
 # with Lexis aggregation regions (rows)
 C = matrix(0, nrow = I, ncol = J)
 for (i in 1:I) {
  C[i,] <- c(lexis_grid_aggregation_key)==i
 }

 # Simulate aggregated data ----------------------------------------

 # simulate ungrouped (true) counts from Poisson distribution with
 # linearly increasing trend with age
 lexis_grid_ungrouped_counts <-
  matrix(rpois(J, 1:cnst$ages), nrow = cnst$ages, ncol = cnst$years)
 # aggregate ungrouped counts into vector according to the aggregation
 # pattern defined in the composition matrix
 # the indices of the vector correspond to the labels of the
 # aggregation regions
 y = C%*%c(lexis_grid_ungrouped_counts)
 #sum(y) == sum(lexis_grid_ungrouped_counts)

 # plot ungrouped counts
 image(t(lexis_grid_ungrouped_counts))

 # Construct B-spline basis over ungrouped Lexis grid --------------
 # All code in this section copied and slightly changed by Jonas Schöley from
 # Deirdre I.R. Douma: The Penalized Composite Link Mixed Model. Master Thesis.
 # https://www.universiteitleiden.nl/binaries/content/assets/science/mi/scripties/statscience/douma-d-master-thesiss1577921.pdf

 rowwisekr <- function (X1, X2) {
  one.1 <- matrix(1, 1, ncol(X1))
  one.2 <- matrix(1, 1, ncol(X2))
  kronecker(X1, one.2) * kronecker(one.1, X2)
 }

 # 2 column matrix giving the year and age coordinates for the centroids
 # of the cells on the fine Lexis grid. Row numbers correspond to cell_id.
 coord <- cbind(
  rep(1:cnst$years, each = cnst$ages),
  rep(1:cnst$ages, cnst$years)
 )
 k = 15; deg = 3; ord_D = 2

 # b-spline basis for year coordinates
 coord1.min <- min(coord[, 1])
 coord1.max <- max(coord[, 1])
 d1 <- (coord1.max - coord1.min)/(k - deg)
 knots1 <- seq(coord1.min - deg * d1, coord1.max + deg * d1, by = d1)
 B1 <- splines::splineDesign(
  x = coord[, 1], knots = knots1, ord = deg + 1,
  outer.ok = TRUE, sparse = TRUE
 )

 # b-spline basis for age coordinates
 coord2.min <- min(coord[, 2])
 coord2.max <- max(coord[, 2])
 d2 <- (coord2.max - coord2.min)/(k - deg)
 knots2 <- seq(coord2.min - deg * d2, coord2.max + deg * d2, by = d2)
 B2 <- splines::splineDesign(
  x = coord[, 2], knots = knots2, ord = deg + 1,
  outer.ok = TRUE, sparse = TRUE
 )

 # difference matrix
 D <- diff(diag(k), diff = ord_D)

 # reparametrisation - Mixed model decomposition
 # SVD of P and constructing X and Z for two dimensions
 # This is really clever as the mixed model formulation gets rid of
 # the need to do an expensive grid search for the optimal lambda
 # smoothing parameter. Instead one assumes the parameters of the
 # b-spline to originate from a multivariate normal distribution, thereby
 # performing the regularization.
 # - Jonas Schöley
 P <- t(D)%*%D #kxk
 eigen.P <- eigen(P)
 U <- eigen.P$vectors[, 1:(k - ord_D)]
 Sigma <- eigen.P$values[1:(k - ord_D)]

 L <- U %*% diag(sqrt(Sigma))
 Z1 <- B1 %*% L %*% solve(t(L) %*% L)
 Z2 <- B2 %*% L %*% solve(t(L) %*% L)
 X_mat <- cbind(rep(1, k), 1:k) #kxp
 X1 <- B1 %*% X_mat #Jxp
 X2 <- B2 %*% X_mat #Jxp

 # Constructing X and Z with row-wise Kronecker product
 X <- rowwisekr(X2, X1)
 Z <- cbind(rowwisekr(Z2, X1), rowwisekr(X2, Z1), rowwisekr(Z2, Z1))

 # Fit PCLM --------------------------------------------------------
 # All code in this section copied and slightly changed from
 # Deirdre I.R. Douma: The Penalized Composite Link Mixed Model. Master Thesis.
 # https://www.universiteitleiden.nl/binaries/content/assets/science/mi/scripties/statscience/douma-d-master-thesiss1577921.pdf

 I <- nrow(y)
 J <- nrow(X)
 q <- ncol(Z)
 y <- as.matrix(y, ncol = 1)

 # Obtaining initial estimates
 C <- Matrix(C, sparse = TRUE)
 row_C <- rowSums(C) # cells contributing to aggregate count
 mean_row <- y / row_C # counts per contributing cell if contributions to aggregate are uniform
 y_long <- rep(mean_row, row_C) # estimated cell counts under uniform contributions to aggregate
 fit <- glm(floor(y_long) ~ 0 + as.matrix(X), family = 'poisson')

 # Setting up preliminaries for loop
 dif <- 10
 theta <- c(0)
 beta_old <- fit$coefficients #Vector of p
 u_old <- c(rep(0, q)) #Vector of q
 # I changed this to log-fitted values as seemed to be intended in the
 # original code. otherwise the exp in the next step does not make sense.
 # I also changed the glm step above to poisson, mainly due to the log-link
 # - Jonas Schöley
 eta_init <- log(fit$fitted.values) #Jx1
 gamm <- exp(eta_init) #Jx1
 Gamm <- .sparseDiagonal(n = J, x = as.vector(gamm)) #JxJ

 H <- numeric(1)
 s <- numeric(1)

 mu <- C %*% gamm #Ix1
 mu <- Matrix(mu, sparse = TRUE)
 W <- .sparseDiagonal(n = I, x = as.vector(mu)) #IxI
 Winv <- .sparseDiagonal(n = I, x = (1 / as.vector(mu))) #IxI

 X_breve <- Winv %*% C %*% Gamm %*% X #Ixp
 Z_breve <- Winv %*% C %*% Gamm %*% Z #Ixq
 z <- X_breve %*% beta_old + Z_breve %*% u_old + Winv %*% (y - mu) #Ix1

 G <- .sparseDiagonal(n = q, x = theta) #qxq

 ZGZ <- Z_breve %*% G %*% t(Z_breve) #IxI
 V <- Winv + ZGZ #IxI
 V <- Matrix(V, sparse = TRUE)
 V_inv <- solve(V) #IxI

 while(any(dif > 1e-6)){
  #Estimating Coefficients
  beta <- solve(t(X_breve) %*% V_inv %*% X_breve) %*% t(X_breve) %*% V_inv %*% z #p
  u <- G %*% t(Z_breve) %*% V_inv %*% (z - X_breve %*% beta_old) #q
  eta <- X %*% beta + Z %*% u #Jx1
  
  # Applying Newton-Raphson to calculate theta
  P <- V_inv - V_inv %*% X_breve %*% solve(t(X_breve) %*% V_inv %*% X_breve) %*%
    t(X_breve) %*% V_inv
  
  # Hessian matrix
  H <- (1/2) %*% sum(diag((t(Z_breve) %*% P %*% Z_breve %*% t(Z_breve)%*% P %*% Z_breve))) -
    t(z) %*% P %*% Z_breve %*% t(Z_breve) %*% P%*% Z_breve %*% t(Z_breve) %*% P %*% z

  s <- -(1/2) %*% sum(diag((t(Z_breve) %*% P %*% Z_breve))) +
    (1/2) %*%(t(z) %*% P %*% Z_breve %*% t(Z_breve) %*% P %*% z)
  
  theta_new <- as.numeric(theta - solve(H) * s)
  gamm <- as.numeric(exp(eta))
  Gamm <- .sparseDiagonal(n = J, x = as.vector(gamm)) #JxJ
  mu <- C %*% gamm #Ix1
  mu <- Matrix(mu, sparse = TRUE)
  W <- .sparseDiagonal(n = I, x = as.vector(mu)) #IxI
  Winv <- .sparseDiagonal(n = I, x = (1 / as.vector(mu))) #IxI  
  
  X_breve <- Winv %*% C %*% Gamm %*% X #Ixp
  Z_breve <- Winv %*% C %*% Gamm %*% Z #Ixq
  z <- X_breve %*% beta + Z_breve %*% u + Winv %*% (y - mu) #Ix1
  G <- .sparseDiagonal(n = q, x = theta_new) #qxq
  ZGZ <- Z_breve %*% G %*% t(Z_breve) #IxI  
  V <- Winv + ZGZ #IxI
  V <- Matrix(V, sparse = TRUE)  
  V_inv <- solve(V) #IxI
  
  # Updating Parameters
  dif <- abs(beta - beta_old)
  beta_old <- beta
  theta <- theta_new
 }  

 # Show ungrouped counts -------------------------------------------

 plot(mu-y)
 abs(sum(gamm) - sum(y))

 lexis_grid_ungrouped_counts_predicted <-
  matrix(gamm, nrow = cnst$ages, ncol = cnst$years)
  
 # true ungrouped (points) vs. predicted ungrouped (surface)
 rgl::persp3d(
  x = 1:cnst$ages, y = 1:cnst$years, lexis_grid_ungrouped_counts_predicted,
  color = 'grey'
 )
 rgl::points3d(
  x = coord[,2], y = coord[,1], z = c(lexis_grid_ungrouped_counts),
  col = 'black', alpha = 0.2
 )
	# Disaggregation of 2D histogram of Poisson-counts with irregular bins
	# based upon the penalized composite link mixed model
	#
	# Jonas Schöley, based on code from
	# Deirdre I.R. Douma: The Penalized Composite Link Mixed Model.
	# see further references in code

	# Given is a matrix of observed counts over 2 dimensions (here
	# year and age, i.e. a so-called Lexis surface).
	# Each entry in the matrix corresponds to a rectangular region on the
	# 2D surface. The regions do not overlap, but may be of irregular
	# dimensions. The regions completely fill the rectangle given by the
	# range of the age and year coordinates.
	#
	# It is known that the counts have been aggregated into the regions
	# from counts observed on a smaller grid. The goal is to estimate those
	# original counts on a smaller grid. This is achieved by the penalized
	# composite link (mixed) model. Part of this model is a matrix <C> which
	# defines the aggregation pattern, i.e. the association between regions on
	# the original, fine grid and regions on the aggregation grid.

	# Init ------------------------------------------------------------

	set.seed(1987)

	library(Matrix)

	cnst <- list(
	# number of year-cells on the fine grid
	years = 120,
	# number of age-cells on the fine grid
	ages = 100,
	# two different age groupings
	age_grouping_a =
	rep(1:5, c(20, 10, 10, 10, 50)),
	age_grouping_b =
	rep(1:4, c(20, 20, 10, 50))
	)

	# Construct composition matrix ------------------------------------

	# number of cells in ungrouped lexis grid
	J = cnst$years*cnst$ages
	# Lexis grid cells are sequentially enumerated in column-major order
	lexis_grid_ungrouped <-
	matrix(1:J, nrow = cnst$ages, ncol = cnst$years)

	# superimpose the aggregation pattern on top of the ungrouped lexis grid
	lexis_grid_aggregation_pattern <- matrix(
	c(rep(cnst$age_grouping_a, 100), rep(cnst$age_grouping_b, 20)),
	nrow = cnst$ages, ncol = cnst$years
	)
	# plot the aggregation pattern
	image(t(lexis_grid_aggregation_pattern))

	# give a unique label to each aggregation region on the ungrouped lexis grid
	lexis_grid_aggregation_key <- lexis_grid_aggregation_pattern
	for (j in 2:cnst$years) {
	lexis_grid_aggregation_key[,j] <-
	lexis_grid_aggregation_key[cnst$ages,j-1] +
	lexis_grid_aggregation_key[,j]
	}
	# number of cells in grouped lexis surface
	I = max(lexis_grid_aggregation_key)

	# composition matrix, associates ungrouped Lexis grid cells (columns)
	# with Lexis aggregation regions (rows)
	C = matrix(0, nrow = I, ncol = J)
	for (i in 1:I) {
	C[i,] <- c(lexis_grid_aggregation_key)==i
	}

	# Simulate aggregated data ----------------------------------------

	# simulate ungrouped (true) counts from Poisson distribution with
	# linearly increasing trend with age
	lexis_grid_ungrouped_counts <-
	matrix(rpois(J, 1:cnst$ages), nrow = cnst$ages, ncol = cnst$years)
	# aggregate ungrouped counts into vector according to the aggregation
	# pattern defined in the composition matrix
	# the indices of the vector correspond to the labels of the
	# aggregation regions
	y = C%*%c(lexis_grid_ungrouped_counts)
	#sum(y) == sum(lexis_grid_ungrouped_counts)

	# plot ungrouped counts
	image(t(lexis_grid_ungrouped_counts))

	# Construct B-spline basis over ungrouped Lexis grid --------------
	# All code in this section copied and slightly changed by Jonas Schöley from
	# Deirdre I.R. Douma: The Penalized Composite Link Mixed Model. Master Thesis.
	# https://www.universiteitleiden.nl/binaries/content/assets/science/mi/scripties/statscience/douma-d-master-thesiss1577921.pdf

	rowwisekr <- function (X1, X2) {
	one.1 <- matrix(1, 1, ncol(X1))
	one.2 <- matrix(1, 1, ncol(X2))
	kronecker(X1, one.2) * kronecker(one.1, X2)
	}

	# 2 column matrix giving the year and age coordinates for the centroids
	# of the cells on the fine Lexis grid. Row numbers correspond to cell_id.
	coord <- cbind(
	rep(1:cnst$years, each = cnst$ages),
	rep(1:cnst$ages, cnst$years)
	)
	k = 15; deg = 3; ord_D = 2

	# b-spline basis for year coordinates
	coord1.min <- min(coord[, 1])
	coord1.max <- max(coord[, 1])
	d1 <- (coord1.max - coord1.min)/(k - deg)
	knots1 <- seq(coord1.min - deg * d1, coord1.max + deg * d1, by = d1)
	B1 <- splines::splineDesign(
	x = coord[, 1], knots = knots1, ord = deg + 1,
	outer.ok = TRUE, sparse = TRUE
	)

	# b-spline basis for age coordinates
	coord2.min <- min(coord[, 2])
	coord2.max <- max(coord[, 2])
	d2 <- (coord2.max - coord2.min)/(k - deg)
	knots2 <- seq(coord2.min - deg * d2, coord2.max + deg * d2, by = d2)
	B2 <- splines::splineDesign(
	x = coord[, 2], knots = knots2, ord = deg + 1,
	outer.ok = TRUE, sparse = TRUE
	)

	# difference matrix
	D <- diff(diag(k), diff = ord_D)

	# reparametrisation - Mixed model decomposition
	# SVD of P and constructing X and Z for two dimensions
	# This is really clever as the mixed model formulation gets rid of
	# the need to do an expensive grid search for the optimal lambda
	# smoothing parameter. Instead one assumes the parameters of the
	# b-spline to originate from a multivariate normal distribution, thereby
	# performing the regularization.
	# - Jonas Schöley
	P <- t(D)%*%D #kxk
	eigen.P <- eigen(P)
	U <- eigen.P$vectors[, 1:(k - ord_D)]
	Sigma <- eigen.P$values[1:(k - ord_D)]

	L <- U %*% diag(sqrt(Sigma))
	Z1 <- B1 %% L %% solve(t(L) %*% L)
	Z2 <- B2 %% L %% solve(t(L) %*% L)
	X_mat <- cbind(rep(1, k), 1:k) #kxp
	X1 <- B1 %*% X_mat #Jxp
	X2 <- B2 %*% X_mat #Jxp

	# Constructing X and Z with row-wise Kronecker product
	X <- rowwisekr(X2, X1)
	Z <- cbind(rowwisekr(Z2, X1), rowwisekr(X2, Z1), rowwisekr(Z2, Z1))

	# Fit PCLM --------------------------------------------------------
	# All code in this section copied and slightly changed from
	# Deirdre I.R. Douma: The Penalized Composite Link Mixed Model. Master Thesis.
	# https://www.universiteitleiden.nl/binaries/content/assets/science/mi/scripties/statscience/douma-d-master-thesiss1577921.pdf

	I <- nrow(y)
	J <- nrow(X)
	q <- ncol(Z)
	y <- as.matrix(y, ncol = 1)

	# Obtaining initial estimates
	C <- Matrix(C, sparse = TRUE)
	row_C <- rowSums(C) # cells contributing to aggregate count
	mean_row <- y / row_C # counts per contributing cell if contributions to aggregate are uniform
	y_long <- rep(mean_row, row_C) # estimated cell counts under uniform contributions to aggregate
	fit <- glm(floor(y_long) ~ 0 + as.matrix(X), family = 'poisson')

	# Setting up preliminaries for loop
	dif <- 10
	theta <- c(0)
	beta_old <- fit$coefficients #Vector of p
	u_old <- c(rep(0, q)) #Vector of q
	# I changed this to log-fitted values as seemed to be intended in the
	# original code. otherwise the exp in the next step does not make sense.
	# I also changed the glm step above to poisson, mainly due to the log-link
	# - Jonas Schöley
	eta_init <- log(fit$fitted.values) #Jx1
	gamm <- exp(eta_init) #Jx1
	Gamm <- .sparseDiagonal(n = J, x = as.vector(gamm)) #JxJ

	H <- numeric(1)
	s <- numeric(1)

	mu <- C %*% gamm #Ix1
	mu <- Matrix(mu, sparse = TRUE)
	W <- .sparseDiagonal(n = I, x = as.vector(mu)) #IxI
	Winv <- .sparseDiagonal(n = I, x = (1 / as.vector(mu))) #IxI

	X_breve <- Winv %% C %% Gamm %*% X #Ixp
	Z_breve <- Winv %% C %% Gamm %*% Z #Ixq
	z <- X_breve %% beta_old + Z_breve %% u_old + Winv %*% (y - mu) #Ix1

	G <- .sparseDiagonal(n = q, x = theta) #qxq

	ZGZ <- Z_breve %% G %% t(Z_breve) #IxI
	V <- Winv + ZGZ #IxI
	V <- Matrix(V, sparse = TRUE)
	V_inv <- solve(V) #IxI

	while(any(dif > 1e-6)){
	#Estimating Coefficients
	beta <- solve(t(X_breve) %% V_inv %% X_breve) %% t(X_breve) %% V_inv %*% z #p
	u <- G %% t(Z_breve) %% V_inv %% (z - X_breve %% beta_old) #q
	eta <- X %% beta + Z %% u #Jx1

	# Applying Newton-Raphson to calculate theta
	P <- V_inv - V_inv %% X_breve %% solve(t(X_breve) %% V_inv %% X_breve) %*%
	t(X_breve) %*% V_inv

	# Hessian matrix
	H <- (1/2) %% sum(diag((t(Z_breve) %% P %% Z_breve %% t(Z_breve)%% P %% Z_breve))) -
	t(z) %% P %% Z_breve %% t(Z_breve) %% P%% Z_breve %% t(Z_breve) %% P %% z

	s <- -(1/2) %% sum(diag((t(Z_breve) %% P %*% Z_breve))) +
	(1/2) %%(t(z) %% P %% Z_breve %% t(Z_breve) %% P %% z)

	theta_new <- as.numeric(theta - solve(H) * s)
	gamm <- as.numeric(exp(eta))
	Gamm <- .sparseDiagonal(n = J, x = as.vector(gamm)) #JxJ
	mu <- C %*% gamm #Ix1
	mu <- Matrix(mu, sparse = TRUE)
	W <- .sparseDiagonal(n = I, x = as.vector(mu)) #IxI
	Winv <- .sparseDiagonal(n = I, x = (1 / as.vector(mu))) #IxI

	X_breve <- Winv %% C %% Gamm %*% X #Ixp
	Z_breve <- Winv %% C %% Gamm %*% Z #Ixq
	z <- X_breve %% beta + Z_breve %% u + Winv %*% (y - mu) #Ix1
	G <- .sparseDiagonal(n = q, x = theta_new) #qxq
	ZGZ <- Z_breve %% G %% t(Z_breve) #IxI
	V <- Winv + ZGZ #IxI
	V <- Matrix(V, sparse = TRUE)
	V_inv <- solve(V) #IxI

	# Updating Parameters
	dif <- abs(beta - beta_old)
	beta_old <- beta
	theta <- theta_new
	}

	# Show ungrouped counts -------------------------------------------

	plot(mu-y)
	abs(sum(gamm) - sum(y))

	lexis_grid_ungrouped_counts_predicted <-
	matrix(gamm, nrow = cnst$ages, ncol = cnst$years)

	# true ungrouped (points) vs. predicted ungrouped (surface)
	rgl::persp3d(
	x = 1:cnst$ages, y = 1:cnst$years, lexis_grid_ungrouped_counts_predicted,
	color = 'grey'
	)
	rgl::points3d(
	x = coord[,2], y = coord[,1], z = c(lexis_grid_ungrouped_counts),
	col = 'black', alpha = 0.2
	)