fditraglia · September 15, 2018 19:26
diff --git a/drop-regressor.R b/drop-regressor.R
 # In this example, both x1 and x2 predict y and the two predictors are correlated 
 # with each other, but x2 has a much larger regression coefficient that x1. In a 
 # regression with both x1 and x2, both predictors are statistically significant. 
 # However if we drop x2, then x1 becomes insignificant. This is because dropping 
 # x2 dramatically increases the residual standard error. This effect overwhelms the
 # increase in the estimated coefficient of x1 that arises from its correlation with
 # the omitted variable x2.

 library(MASS)
 set.seed(1234)
 n <- 100
 S <- matrix(c(1, 0.1, 
              0.1, 1), 2, 2, byrow = TRUE)
 X <- mvrnorm(n, mu = c(0, 0), S)
 x1 <- X[,1]
 x2 <- X[,2]

 epsilon <- rnorm(n)
 b0 <- 0
 b1 <- 0.2
 b2 <- 2
 y <- b0 + b1 * x1 + b2 * x2 + epsilon

 reg1 <- lm(y ~ x1 + x2)
 reg2 <- lm(y ~ x1)
 reg3 <- lm(y ~ x2)

 summary(reg1)
 summary(reg2)
	# In this example, both x1 and x2 predict y and the two predictors are correlated
	# with each other, but x2 has a much larger regression coefficient that x1. In a
	# regression with both x1 and x2, both predictors are statistically significant.
	# However if we drop x2, then x1 becomes insignificant. This is because dropping
	# x2 dramatically increases the residual standard error. This effect overwhelms the
	# increase in the estimated coefficient of x1 that arises from its correlation with
	# the omitted variable x2.

	library(MASS)
	set.seed(1234)
	n <- 100
	S <- matrix(c(1, 0.1,
	0.1, 1), 2, 2, byrow = TRUE)
	X <- mvrnorm(n, mu = c(0, 0), S)
	x1 <- X[,1]
	x2 <- X[,2]

	epsilon <- rnorm(n)
	b0 <- 0
	b1 <- 0.2
	b2 <- 2
	y <- b0 + b1 * x1 + b2 * x2 + epsilon

	reg1 <- lm(y ~ x1 + x2)
	reg2 <- lm(y ~ x1)
	reg3 <- lm(y ~ x2)

	summary(reg1)
	summary(reg2)