lcolladotor · February 12, 2021 19:01
diff --git a/explore_berto_et_al_2019.R b/explore_berto_et_al_2019.R
 ## Related to https://docs.google.com/document/d/1uDdq0W9eAEnyPLf_gKJRKI8TGpujXIhGhaHBuLdmmRE/edit?usp=sharing
 ## and Berto et al 2019 https://www.pnas.org/content/116/48/24334

 library("SummarizedExperiment")

 ## The two data sets we'll use
 datasets <- c("NeuN", "OLIG2")

 ## Read in data for both datasets and build a SummarizedExperiment object
 rse_list <- lapply(datasets, function(dataid) {
    ## Read in the data
    counts <- read.table(paste0(dataid, "_Primates_AdjExp.txt"))
    pheno <- read.table(paste0(dataid, "_Primates_pheno.txt"))
    
    ## Make a table for the gene information we have
    gene_info <- DataFrame(symbol = rownames(counts))
    
    ## Build a RangedSummarizedExperiment object
    SummarizedExperiment(
        assays = list(counts = counts),
        rowData = gene_info,
        colData = pheno
    )
 })
 names(rse_list) <- datasets

 ## Can't run due to different number of genes
 do.call(cbind, rse_list)

 ## Find the unique genes across both datasets
 genes <- unique(unlist(lapply(rse_list, rownames)))
 genes
 # [1] 9037

 ## Make it such that we have the same number of genes in each dataset
 rse_uniform_list <- lapply(rse_list, function(rse) {
    ## Make an empty large matrix
    new_mat <- matrix(0, nrow = length(genes), ncol = ncol(rse))
    
    ## Add the gene and sample names
    rownames(new_mat) <- genes
    colnames(new_mat) <- colnames(rse)
    
    ## Find the position for our current genes among the merged set of genes
    m <- match(rownames(rse), genes)
    
    ## Replace the 0s in the matrix with the actual counts/data we have
    new_mat[m, ] <- as.matrix(assays(rse)$counts)
    
    ## Make a new SummarizedExperiment object
    SummarizedExperiment(
        assays = list(counts = new_mat),
        rowData = DataFrame(symbol = genes),
        colData = colData(rse)
    )
 })

 ## We can now combine them into one
 rse <- do.call(cbind, rse_uniform_list)

 ## Add the "dataset" where each sample comes from
 rse$Dataset <- rep(datasets, sapply(rse_uniform_list, ncol))

 ## HumAge is really a categorical variable, so let's treat it as such
 rse$HumAge <- as.factor(rse$HumAge)

 ## Now we can use iSEE to interactively explore the merged data
 library("iSEE")
 iSEE(rse)
	## Related to https://docs.google.com/document/d/1uDdq0W9eAEnyPLf_gKJRKI8TGpujXIhGhaHBuLdmmRE/edit?usp=sharing
	## and Berto et al 2019 https://www.pnas.org/content/116/48/24334

	library("SummarizedExperiment")

	## The two data sets we'll use
	datasets <- c("NeuN", "OLIG2")

	## Read in data for both datasets and build a SummarizedExperiment object
	rse_list <- lapply(datasets, function(dataid) {
	## Read in the data
	counts <- read.table(paste0(dataid, "_Primates_AdjExp.txt"))
	pheno <- read.table(paste0(dataid, "_Primates_pheno.txt"))

	## Make a table for the gene information we have
	gene_info <- DataFrame(symbol = rownames(counts))

	## Build a RangedSummarizedExperiment object
	SummarizedExperiment(
	assays = list(counts = counts),
	rowData = gene_info,
	colData = pheno
	)
	})
	names(rse_list) <- datasets

	## Can't run due to different number of genes
	do.call(cbind, rse_list)

	## Find the unique genes across both datasets
	genes <- unique(unlist(lapply(rse_list, rownames)))
	genes
	# [1] 9037

	## Make it such that we have the same number of genes in each dataset
	rse_uniform_list <- lapply(rse_list, function(rse) {
	## Make an empty large matrix
	new_mat <- matrix(0, nrow = length(genes), ncol = ncol(rse))

	## Add the gene and sample names
	rownames(new_mat) <- genes
	colnames(new_mat) <- colnames(rse)

	## Find the position for our current genes among the merged set of genes
	m <- match(rownames(rse), genes)

	## Replace the 0s in the matrix with the actual counts/data we have
	new_mat[m, ] <- as.matrix(assays(rse)$counts)

	## Make a new SummarizedExperiment object
	SummarizedExperiment(
	assays = list(counts = new_mat),
	rowData = DataFrame(symbol = genes),
	colData = colData(rse)
	)
	})

	## We can now combine them into one
	rse <- do.call(cbind, rse_uniform_list)

	## Add the "dataset" where each sample comes from
	rse$Dataset <- rep(datasets, sapply(rse_uniform_list, ncol))

	## HumAge is really a categorical variable, so let's treat it as such
	rse$HumAge <- as.factor(rse$HumAge)

	## Now we can use iSEE to interactively explore the merged data
	library("iSEE")
	iSEE(rse)