Last active
February 12, 2020 10:58
-
-
Save m-Py/b040c25a7074243c8f0ca93ea16854e5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## This document illustrates that type 1 sum of squares lead to increased alpha | |
## error rates when a predictive covariate is included in the regression model. | |
# Estimate p-value for treatment (null) effect via linear regression, | |
# including a covariate that is predictive of the outcome | |
# | |
# param N: sample size, default 100 | |
# param beta_covariate: regression weight for a covariate on an outcome, default 0.5 | |
# param coding: how is the treatment coded, defaults to 0/1 | |
# param AOV: sum of squares method; if `FALSE`, the p-value is just estimated via `summary()` | |
# (= default); if 1, the p-value is estimated via `anova()`; if 2 or 3, the p-value | |
# is estimated using type 2 or type 3 sum of squares using `car::Anova()` | |
# return: the p-value associated with the "treatment" | |
# | |
# Details: | |
# Data is generated via a regression model, predicting the outcome from a covariate | |
# `outcome = beta_covariate * rnorm(N) + error` | |
# [`+ treatment * 0`, i.e. a null effect of treatment, is implicit] | |
# | |
covariate_regression <- function( | |
N = 100, | |
beta_covariate = 0.5, | |
coding = c(0, 1), | |
AOV = FALSE | |
) { | |
# Simulate covariate and outcome data | |
data <- data.frame(covariate = rnorm(N)) | |
error <- rnorm(N) | |
data$outcome <- beta_covariate * data$covariate + error | |
# Insert treatment variable that has no effect | |
data$treatment <- sample(rep_len(coding, N)) | |
# do regression to test for treatment effect | |
model <- lm(outcome ~ treatment * covariate, data = data) | |
get_p_value(model, "treatment", AOV) | |
} | |
get_p_value <- function(model, effect, AOV = 0) { | |
stopifnot(AOV %in% 0:3) | |
if (AOV == 0) { | |
tab <- summary(model)$coefficients | |
# extract p-value | |
return(tab[rownames(tab) == effect, "Pr(>|t|)"]) | |
} else if (AOV == 1) { | |
tab <- anova(model) | |
return(tab[rownames(tab) == effect, "Pr(>F)"]) | |
} | |
tab <- car::Anova(model, type = AOV) | |
tab[rownames(tab) == effect, "Pr(>F)"] | |
} | |
# Standard regression, using summary to print p-value: | |
mean(replicate(10000, covariate_regression()) <= .05) | |
#> 0.0513 | |
# Use `anova()` to print p-value: | |
mean(replicate(10000, covariate_regression(AOV = 1)) <= .05) | |
#> 0.0828 | |
# Coding scheme does not seem to matter: | |
mean(replicate(10000, covariate_regression(AOV = 1, coding = -1:1)) <= .05) | |
#> 0.0793 | |
# No problem if covariate is not predictive of outcome: | |
mean(replicate(10000, covariate_regression(AOV = 1, beta_covariate = 0)) <= .05) | |
#> 0.05 | |
# No problem for type 2 or type 3 sum of squares | |
mean(replicate(10000, covariate_regression(AOV = 2)) <= .05) | |
#> 0.0484 | |
mean(replicate(10000, covariate_regression(AOV = 3)) <= .05) | |
#> 0.0531 | |
### Problem disappears when the covariate comes first in the regression model! | |
# lm(outcome ~ covariate * treatment, data = data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment