Last active
February 12, 2021 19:01
-
-
Save lcolladotor/da5ec764b522789cdee9da79f231a40a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Related to https://docs.google.com/document/d/1uDdq0W9eAEnyPLf_gKJRKI8TGpujXIhGhaHBuLdmmRE/edit?usp=sharing | |
## and Berto et al 2019 https://www.pnas.org/content/116/48/24334 | |
library("SummarizedExperiment") | |
## The two data sets we'll use | |
datasets <- c("NeuN", "OLIG2") | |
## Read in data for both datasets and build a SummarizedExperiment object | |
rse_list <- lapply(datasets, function(dataid) { | |
## Read in the data | |
counts <- read.table(paste0(dataid, "_Primates_AdjExp.txt")) | |
pheno <- read.table(paste0(dataid, "_Primates_pheno.txt")) | |
## Make a table for the gene information we have | |
gene_info <- DataFrame(symbol = rownames(counts)) | |
## Build a RangedSummarizedExperiment object | |
SummarizedExperiment( | |
assays = list(counts = counts), | |
rowData = gene_info, | |
colData = pheno | |
) | |
}) | |
names(rse_list) <- datasets | |
## Can't run due to different number of genes | |
do.call(cbind, rse_list) | |
## Find the unique genes across both datasets | |
genes <- unique(unlist(lapply(rse_list, rownames))) | |
genes | |
# [1] 9037 | |
## Make it such that we have the same number of genes in each dataset | |
rse_uniform_list <- lapply(rse_list, function(rse) { | |
## Make an empty large matrix | |
new_mat <- matrix(0, nrow = length(genes), ncol = ncol(rse)) | |
## Add the gene and sample names | |
rownames(new_mat) <- genes | |
colnames(new_mat) <- colnames(rse) | |
## Find the position for our current genes among the merged set of genes | |
m <- match(rownames(rse), genes) | |
## Replace the 0s in the matrix with the actual counts/data we have | |
new_mat[m, ] <- as.matrix(assays(rse)$counts) | |
## Make a new SummarizedExperiment object | |
SummarizedExperiment( | |
assays = list(counts = new_mat), | |
rowData = DataFrame(symbol = genes), | |
colData = colData(rse) | |
) | |
}) | |
## We can now combine them into one | |
rse <- do.call(cbind, rse_uniform_list) | |
## Add the "dataset" where each sample comes from | |
rse$Dataset <- rep(datasets, sapply(rse_uniform_list, ncol)) | |
## HumAge is really a categorical variable, so let's treat it as such | |
rse$HumAge <- as.factor(rse$HumAge) | |
## Now we can use iSEE to interactively explore the merged data | |
library("iSEE") | |
iSEE(rse) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment