library(tidyverse)
# Make yard size, the presence/absence of a home garden, and attitudes toward the
# environment all correlated with each other. Home garden will be binary, so I split
# it based on some threshold later. Attitudes toward the environment will be on a
# scale of 1-10, so I rescale and ceiling it later
mu <- c(yard_size = 20000, home_garden = 30, attitude_env = 70)
stddev <- c(yard_size = 10000, home_garden = 15, attitude_env = 40)
cor_matrix <- matrix(c(1.0, 0.7, 0.1,
0.7, 1.0, 0.9,
0.1, 0.9, 1.0),
ncol = 3)
cov_matrix <- stddev %*% t(stddev) * cor_matrix
set.seed(1234)
confounders <- MASS::mvrnorm(n = 2000, mu = mu, Sigma = cov_matrix,
empirical = TRUE) %>%
as_tibble() %>%
# Adjust generated variables
mutate(home_garden = home_garden > 35) %>% # THIS IS CAUSING THE WEIRD/NEAT SPLIT
mutate(attitude_env = scales::rescale(attitude_env, to = c(1, 10)),
attitude_env = round(attitude_env, 0),
yard_size = round(abs(yard_size), 0)) %>%
# Make new temperature variable
mutate(temperature = round(rnorm(2000, mean = 70, sd = 5), 1)) %>%
# Generate probability of self-selecting into the rain barrel program
mutate(prob_barrel = (0.5 * attitude_env) + (4 * home_garden) +
(0.05 * yard_size / 1000) + (0.3 * temperature),
# Force probability to 0-1 scale
prob_barrel = scales::rescale(prob_barrel, to = c(0.05, 0.85)))
head(confounders)
#> # A tibble: 6 x 5
#> yard_size home_garden attitude_env temperature prob_barrel
#> <dbl> <lgl> <dbl> <dbl> <dbl>
#> 1 7100 FALSE 4 70.1 0.283
#> 2 17367 FALSE 6 69 0.341
#> 3 19499 TRUE 7 64.8 0.507
#> 4 33378 FALSE 1 70.1 0.274
#> 5 6011 FALSE 5 68.5 0.281
#> 6 12578 FALSE 5 70.8 0.332
ggplot(confounders, aes(x = temperature, y = prob_barrel)) +
geom_point() +
geom_smooth(method = "lm")
#> `geom_smooth()` using formula 'y ~ x'
Created on 2020-02-25 by the reprex package (v0.3.0)