Skip to content

Instantly share code, notes, and snippets.

@raivivek
Created August 27, 2024 01:39
Show Gist options
  • Save raivivek/16b26b586130845169e510049fcc6026 to your computer and use it in GitHub Desktop.
Save raivivek/16b26b586130845169e510049fcc6026 to your computer and use it in GitHub Desktop.
Share with Kehinde / raivivek / DeSeq2 analysis of raw bulk RNA-seq
[...]
run_differential <- function(counts_mat, metadata,
design = "Disease.Status + Age + Sex + BMI",
min_reads_per_sample = 5,
min_fraction_of_sample = .25) {
sample_ids <- colnames(counts_mat)
message(glue("info: counts matrix: {paste(dim(counts_mat), collapse = ', ')} \t Covariate matrix: {paste(dim(metadata), collapse = ', ')}"))
message("info: preparing colData..")
colData <- metadata %>%
mutate(Sample.ID = factor(Sample.ID, levels = sample_ids, ordered = T),
Sex = factor(Sex),
Age = scale(Age),
BMI = scale(BMI)) %>%
arrange(Sample.ID) %>%
column_to_rownames(var = "Sample.ID")
message(glue("info: filtering genes using {min_reads_per_sample} reads per sample in >={min_fraction_of_sample*100}% of samples.."))
counts_mat <- counts_mat[apply(counts_mat, 1, function(x) sum(x > min_reads_per_sample) >= length(x) * min_fraction_of_sample), ]
message(glue("info: left with {nrow(counts_mat)} genes.."))
if (any(rownames(colData) != colnames(counts_mat))) { print("names don't match. check or error!") }
message("info: creating dds object..")
dds <- DESeq2::DESeqDataSetFromMatrix(countData = counts_mat, colData = colData,
design = ~ !!treat_string_as_expr(design))
message("info: starting differential analysis..")
dds <- DESeq2::DESeq(dds)
rld <- DESeq2::varianceStabilizingTransformation(dds, blind=TRUE)
message("info: done.")
return(list(dds = dds, rld = rld))
}
# Start reading data
counts <- read_tsv(opts$counts_file) # counts
protein_coding_idx <- opts$gtf$type == "gene" & opts$gtf$gene_type == "protein_coding" & (!seqnames(opts$gtf) %in% c("chrX", "chrY", "chrM"))
select_idx <- counts$Geneid %in% opts$gtf[protein_coding_idx, ]$gene_id # only protein coding
counts_mat <- as.matrix(counts[select_idx, -c(1:2)])
rownames(counts_mat) <- opts$gtf[protein_coding_idx, ]$gene_id
metadata <- readRDS(opts$metadata) %>%
filter(!!treat_inputs_as_exprs(opts$samples_exclude_string))
# exclude QC'ed samples
counts_mat <- counts_mat[, colnames(counts_mat) %in% opts$metadata[[opts$samples_name_col]]]
resDe <- run_differential(
counts_mat,
metadata,
design = opts$design,
min_reads_per_sample = opts$min_reads_per_sample,
min_fraction_of_sample = opts$min_fraction_of_sample
)
result <- results(resDe$dds, contrast = c(str_split(opts$contrast, ",", simplify = T)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment