vjcitn · August 6, 2024 16:56
diff --git a/XenSCE.R b/XenSCE.R
 # retrieve these from https://mghp.osn.xsede.org/bir190004-bucket01/BiocXenData/
 #-rw-r--r-- 1 exouser exouser 3300521324 Mar 20 21:17 transcripts.parquet
 #-rw-r--r-- 1 exouser exouser   68454210 Mar 20 21:16 nucleus_boundaries.parquet
 #-rw-r--r-- 1 exouser exouser   73791358 Mar 20 21:15 cell_boundaries.parquet
 #-rw-r--r-- 1 exouser exouser 242459483 Apr 10 03:46 cell_feature_matrix.tar.gz
 #-rw-r--r-- 1 exouser exouser 44907408 Mar 20 21:15 cells.csv.gz

 # tar zxf cell_feature_matrix.tar.gz to obtain folder cell_feature_matrix

 library(Matrix)
 library(SingleCellExperiment)
 library(ParquetDataFrame) # from github.com/LTLA/ParquetDataFrame

 counts = readMM("cell_feature_matrix/matrix.mtx.gz")
 barc = read.delim("cell_feature_matrix/barcodes.tsv.gz", sep="\t", h=FALSE)
 fea = read.delim("cell_feature_matrix/features.tsv.gz", h=FALSE, sep="\t")
 cellmeta = read.csv("cells.csv.gz")
 rownames(counts) = fea$V1
 colnames(counts) = barc$V1

 sce = SingleCellExperiment(assays=SimpleList(counts=counts))
 colnames(fea) = c("ensid", "symbol", "type")
 rowData(sce) = DataFrame(fea)
 colData(sce) = DataFrame(cellmeta)

 tx = ParquetDataFrame("transcripts.parquet")
 cellb = ParquetDataFrame("cell_boundaries.parquet")
 nucb = ParquetDataFrame("nucleus_boundaries.parquet")

 setClass("XenSCE", contains="SingleCellExperiment", slots=c(cellbounds="ParquetDataFrame",
  transcripts="ParquetDataFrame", nucbounds="ParquetDataFrame"))
 setMethod("show", "XenSCE", function(object) {
  callNextMethod(); 
  cat("Parquet elements:\n")
  print(xdims(object)) 
 } )


 #' helper function for XenSCE show method
 xdims = function (x) 
 {
    ans = sapply(c("transcripts", "cellbounds", "nucbounds"), 
        function(z) dim(slot(x, z)))
    ans = t(ans)
    colnames(ans) = c("nrow", "ncol")
    data.frame(ans)
 }

 #' method for transcript extraction
 #' @export
 setGeneric("getTranscripts", function(x) standardGeneric("getTranscripts"))
 setMethod("getTranscripts", "XenSCE", function(x) slot(x, "transcripts"))

 #' method for cell boundary extraction
 #' @export
 setGeneric("getCellBoundaries", function(x) standardGeneric("getCellBoundaries"))
 setMethod("getCellBoundaries", "XenSCE", function(x) slot(x, "cellbounds"))

 #' method for nucleus boundary extraction
 #' @export
 setGeneric("getNucleusBoundaries", function(x) standardGeneric("getNucleusBoundaries"))
 setMethod("getNucleusBoundaries", "XenSCE", function(x) slot(x, "nucbounds"))

 myxen = new("XenSCE", sce, transcripts=tx, cellbounds=cellb, nucbounds=nucb)

 myxen
	# retrieve these from https://mghp.osn.xsede.org/bir190004-bucket01/BiocXenData/
	#-rw-r--r-- 1 exouser exouser 3300521324 Mar 20 21:17 transcripts.parquet
	#-rw-r--r-- 1 exouser exouser 68454210 Mar 20 21:16 nucleus_boundaries.parquet
	#-rw-r--r-- 1 exouser exouser 73791358 Mar 20 21:15 cell_boundaries.parquet
	#-rw-r--r-- 1 exouser exouser 242459483 Apr 10 03:46 cell_feature_matrix.tar.gz
	#-rw-r--r-- 1 exouser exouser 44907408 Mar 20 21:15 cells.csv.gz

	# tar zxf cell_feature_matrix.tar.gz to obtain folder cell_feature_matrix

	library(Matrix)
	library(SingleCellExperiment)
	library(ParquetDataFrame) # from github.com/LTLA/ParquetDataFrame

	counts = readMM("cell_feature_matrix/matrix.mtx.gz")
	barc = read.delim("cell_feature_matrix/barcodes.tsv.gz", sep="\t", h=FALSE)
	fea = read.delim("cell_feature_matrix/features.tsv.gz", h=FALSE, sep="\t")
	cellmeta = read.csv("cells.csv.gz")
	rownames(counts) = fea$V1
	colnames(counts) = barc$V1

	sce = SingleCellExperiment(assays=SimpleList(counts=counts))
	colnames(fea) = c("ensid", "symbol", "type")
	rowData(sce) = DataFrame(fea)
	colData(sce) = DataFrame(cellmeta)

	tx = ParquetDataFrame("transcripts.parquet")
	cellb = ParquetDataFrame("cell_boundaries.parquet")
	nucb = ParquetDataFrame("nucleus_boundaries.parquet")

	setClass("XenSCE", contains="SingleCellExperiment", slots=c(cellbounds="ParquetDataFrame",
	transcripts="ParquetDataFrame", nucbounds="ParquetDataFrame"))
	setMethod("show", "XenSCE", function(object) {
	callNextMethod();
	cat("Parquet elements:\n")
	print(xdims(object))
	} )


	#' helper function for XenSCE show method
	xdims = function (x)
	{
	ans = sapply(c("transcripts", "cellbounds", "nucbounds"),
	function(z) dim(slot(x, z)))
	ans = t(ans)
	colnames(ans) = c("nrow", "ncol")
	data.frame(ans)
	}

	#' method for transcript extraction
	#' @export
	setGeneric("getTranscripts", function(x) standardGeneric("getTranscripts"))
	setMethod("getTranscripts", "XenSCE", function(x) slot(x, "transcripts"))

	#' method for cell boundary extraction
	#' @export
	setGeneric("getCellBoundaries", function(x) standardGeneric("getCellBoundaries"))
	setMethod("getCellBoundaries", "XenSCE", function(x) slot(x, "cellbounds"))

	#' method for nucleus boundary extraction
	#' @export
	setGeneric("getNucleusBoundaries", function(x) standardGeneric("getNucleusBoundaries"))
	setMethod("getNucleusBoundaries", "XenSCE", function(x) slot(x, "nucbounds"))

	myxen = new("XenSCE", sce, transcripts=tx, cellbounds=cellb, nucbounds=nucb)

	myxen