Skip to content

Instantly share code, notes, and snippets.

@shackett
Last active September 21, 2020 20:42
Show Gist options
  • Save shackett/e4a1a9b930623580282cd64d33a802c4 to your computer and use it in GitHub Desktop.
Save shackett/e4a1a9b930623580282cd64d33a802c4 to your computer and use it in GitHub Desktop.
Simulate a dataset of genomic timecourses
library(dplyr)
library(tidyr)
library(impulse) # from github/shackett
timepts <- c(0, 5, 10, 20, 30, 40, 60, 90) # time points measured
measurement_sd <- 0.5 # standard deviation of Gaussian noise added to each observation
total_measurements <- 10000 # total number of genes
signal_frac <- 0.2 # what fraction of genes contain real signal
set.seed(1234)
# simulate timecourses containing signal
alt_timecourses <- impulse::simulate_timecourses(n = total_measurements * signal_frac * 2,
timepts = timepts,
prior_pars = c(v_sd = 0.8,
rate_shape = 2,
rate_scale = 0.25,
time_shape = 1,
time_scale = 30),
measurement_sd = measurement_sd) %>%
unnest_legacy(measurements) %>%
select(-true_model) %>%
mutate(signal = "contains signal") %>%
# drop timecourses where no true value's magnitude is greater than 1 (these
# aren't really signal containing
# and timecourses where the initial value isn't ~zero
group_by(tc_id) %>%
filter(any(abs(sim_fit) > 1),
abs(sim_fit[time == 0]) < 0.1) %>%
ungroup()
# only retain the target number of signal containing timecourses
alt_timecourses <- alt_timecourses %>%
semi_join(
alt_timecourses %>%
distinct(tc_id) %>%
sample_n(min(n(), total_measurements * signal_frac)),
by = "tc_id")
null_timecourses <- crossing(tc_id = seq(max(alt_timecourses$tc_id) + 1,
max(alt_timecourses$tc_id) + total_measurements * (1-signal_frac)),
time = timepts) %>%
mutate(signal = "no signal",
sim_fit = 0,
abundance = rnorm(n(), 0, measurement_sd))
simulated_timecourses <- bind_rows(alt_timecourses, null_timecourses) %>%
mutate(signal = factor(signal, levels = c("contains signal", "no signal"))) %>%
group_by(tc_id) %>%
mutate(fold_change = abundance - abundance[time == 0]) %>%
ungroup()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment