Skip to content

Instantly share code, notes, and snippets.

@JimGrange
Created November 10, 2023 14:57
Show Gist options
  • Save JimGrange/f347843bf990a9b97b05869af4e5fc23 to your computer and use it in GitHub Desktop.
Save JimGrange/f347843bf990a9b97b05869af4e5fc23 to your computer and use it in GitHub Desktop.
linear modelling vs. aggregate analysis
library(tidyverse)
library(faux)
library(lme4)
library(afex)
options(dplyr.summarise.inform = FALSE)
set.seed(123)
sim_data <- function(n_subj, n_trials){
# set all data-generating parameters
beta_0 <- 800 # intercept; i.e., the grand mean
beta_1 <- 50 # slope; i.e, effect of category
omega_0 <- 80 # by-item random intercept sd
tau_0 <- 100 # by-subject random intercept sd
tau_1 <- 40 # by-subject random slope sd
rho <- .2 # correlation between intercept and slope
sigma <- 200 # residual (error) sd
n_subj <- n_subj
n_trials <- n_trials
items <- tibble(
trial = seq_len(n_trials),
condition = rep(c("A", "B"), c(n_trials / 2, n_trials / 2)),
X_i = rep(c(-0.5, 0.5), c(n_trials / 2, n_trials / 2)),
O_0i = rnorm(n = n_trials, mean = 0, sd = omega_0)
)
# simulate a sample of subjects
# sample from a multivariate random distribution
subjects <- faux::rnorm_multi(
n = n_subj,
mu = 0, # means for random effects are always 0
sd = c(tau_0, tau_1), # set SDs
r = rho, # set correlation, see ?rnorm_multi
varnames = c("T_0s", "T_1s")
) |>
round(2)
# add subject IDs
subjects$subj_id <- seq_len(n_subj)
# cross subject and item IDs; add an error term
# nrow(.) is the number of rows in the table
trials <- crossing(subjects, items) %>%
mutate(e_si = rnorm(nrow(.), mean = 0, sd = sigma))
dat_sim <- trials %>%
mutate(RT = beta_0 + T_0s + (beta_1 + T_1s) * X_i + e_si) %>%
select(subj_id, trial, condition, X_i, RT)
dat_sim <- dat_sim |>
select(id = subj_id, trial, condition, condition_coded = X_i, rt = RT)
# model & check significance of aggregate data
agg_data <- dat_sim |>
group_by(id, condition_coded) |>
summarise(mean_rt = mean(rt))
agg_data_model <- lm(mean_rt ~ 1 + condition_coded, data = agg_data)
p_value_agg <- as.numeric(summary(agg_data_model)$coefficients[, "Pr(>|t|)"][2])
# model & check significance of trial-level data
trial_data_model <- lmer(rt ~ 1 + condition_coded + (1 + condition|id),
data = dat_sim)
p_value_trial <- as.numeric(summary(trial_data_model)$coefficients[, "Pr(>|t|)"][2])
return(round(c(p_value_agg, p_value_trial), 3))
}
n_sims <- 100
sims <- 1:n_sims
n_subj <- c(10, 20, 30, 50, 100)
n_trials <- c(100, 250, 1000)
sim_results <- tibble(crossing(n_subj, n_trials, sims)) |>
mutate(p_agg = 0, p_lmm = 0)
for(i in 1:nrow(sim_results)){
print(i)
sim <- sim_data(sim_results$n_subj[i],
sim_results$n_trials[i])
sim_results$p_agg[i] <- sim[1]
sim_results$p_lmm[i] <- sim[2]
}
pd = position_dodge(3)
sim_results |>
group_by(n_subj, n_trials) |>
summarise(power_agg = (sum(p_agg < 0.05) / n_sims),
power_lmm = (sum(p_lmm < 0.05) / n_sims)) |>
mutate(n_trials = as.factor(n_trials)) |>
pivot_longer(power_agg:power_lmm) |>
ggplot(aes(x = n_subj, y = value, group = n_trials)) +
geom_hline(yintercept = 0.8,
colour = "grey",
linewidth = 1.2) +
geom_line(aes(colour = n_trials),
position = pd,
linewidth = 1) +
geom_point(aes(colour = n_trials),
position = pd,
size = 2.5) +
scale_x_continuous(breaks = c(10, 20, 30, 50, 100)) +
facet_wrap(~name) +
labs(x = "Number of Subjects",
y = "Power") +
theme(text = element_text(size = 16))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment