Skip to content

Instantly share code, notes, and snippets.

@vjcitn
Created August 6, 2024 16:56
Show Gist options
  • Save vjcitn/a734e11caa7b554f5155ade8170de523 to your computer and use it in GitHub Desktop.
Save vjcitn/a734e11caa7b554f5155ade8170de523 to your computer and use it in GitHub Desktop.
simple collection of matrix.mtx and parquet assets from Xenium exemplary dataset
# retrieve these from https://mghp.osn.xsede.org/bir190004-bucket01/BiocXenData/
#-rw-r--r-- 1 exouser exouser 3300521324 Mar 20 21:17 transcripts.parquet
#-rw-r--r-- 1 exouser exouser 68454210 Mar 20 21:16 nucleus_boundaries.parquet
#-rw-r--r-- 1 exouser exouser 73791358 Mar 20 21:15 cell_boundaries.parquet
#-rw-r--r-- 1 exouser exouser 242459483 Apr 10 03:46 cell_feature_matrix.tar.gz
#-rw-r--r-- 1 exouser exouser 44907408 Mar 20 21:15 cells.csv.gz
# tar zxf cell_feature_matrix.tar.gz to obtain folder cell_feature_matrix
library(Matrix)
library(SingleCellExperiment)
library(ParquetDataFrame) # from github.com/LTLA/ParquetDataFrame
counts = readMM("cell_feature_matrix/matrix.mtx.gz")
barc = read.delim("cell_feature_matrix/barcodes.tsv.gz", sep="\t", h=FALSE)
fea = read.delim("cell_feature_matrix/features.tsv.gz", h=FALSE, sep="\t")
cellmeta = read.csv("cells.csv.gz")
rownames(counts) = fea$V1
colnames(counts) = barc$V1
sce = SingleCellExperiment(assays=SimpleList(counts=counts))
colnames(fea) = c("ensid", "symbol", "type")
rowData(sce) = DataFrame(fea)
colData(sce) = DataFrame(cellmeta)
tx = ParquetDataFrame("transcripts.parquet")
cellb = ParquetDataFrame("cell_boundaries.parquet")
nucb = ParquetDataFrame("nucleus_boundaries.parquet")
setClass("XenSCE", contains="SingleCellExperiment", slots=c(cellbounds="ParquetDataFrame",
transcripts="ParquetDataFrame", nucbounds="ParquetDataFrame"))
setMethod("show", "XenSCE", function(object) {
callNextMethod();
cat("Parquet elements:\n")
print(xdims(object))
} )
#' helper function for XenSCE show method
xdims = function (x)
{
ans = sapply(c("transcripts", "cellbounds", "nucbounds"),
function(z) dim(slot(x, z)))
ans = t(ans)
colnames(ans) = c("nrow", "ncol")
data.frame(ans)
}
#' method for transcript extraction
#' @export
setGeneric("getTranscripts", function(x) standardGeneric("getTranscripts"))
setMethod("getTranscripts", "XenSCE", function(x) slot(x, "transcripts"))
#' method for cell boundary extraction
#' @export
setGeneric("getCellBoundaries", function(x) standardGeneric("getCellBoundaries"))
setMethod("getCellBoundaries", "XenSCE", function(x) slot(x, "cellbounds"))
#' method for nucleus boundary extraction
#' @export
setGeneric("getNucleusBoundaries", function(x) standardGeneric("getNucleusBoundaries"))
setMethod("getNucleusBoundaries", "XenSCE", function(x) slot(x, "nucbounds"))
myxen = new("XenSCE", sce, transcripts=tx, cellbounds=cellb, nucbounds=nucb)
myxen
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment