Skip to content

Instantly share code, notes, and snippets.

@DarioS
Created August 22, 2011 06:07
Show Gist options
  • Save DarioS/1161765 to your computer and use it in GitHub Desktop.
Save DarioS/1161765 to your computer and use it in GitHub Desktop.
Convert feature counts into RPKM counts per gene
setGeneric("calcFPKMs", function(counts, ...) {standardGeneric("calcFPKMs")})
setMethod("calcFPKMs", c("GRanges"),
function(counts, verbose = TRUE)
{
counts.df <- as.data.frame(counts)
counts.cols <- metadata(counts)[["counts.cols"]] + 5
# Only use read counts from the known transcriptome.
counts.df <- counts.df[counts.df[, "type"] %in% c("exon", "junction"), ]
gene.id <- counts.df[, "gene"]
gene.loc.expr <- by(counts.df, gene.id, function(x)
{
exons <- x[x[, "type"] == "exon", ]
gene.start <- min(x[, "start"])
gene.end <- max(x[, "end"])
exonic.bases <- sum(exons[, "end"] - exons[, "start"])
list(c(gene.start, gene.end),
colSums(x[, counts.cols]) / (exonic.bases / 1000) / (metadata(counts)[["totals"]] / 1000000))
})
genes.FPKMs <- do.call(rbind, lapply(gene.loc.expr, "[[", 2))
gene.locs <- do.call(rbind, lapply(gene.loc.expr, "[[", 1))
colnames(genes.FPKMs) <- paste(colnames(genes.FPKMs), "FPKM")
gene.match <- match(rownames(genes.FPKMs), gene.id)
gene.chr.strand <- counts.df[gene.match, c("seqnames", "strand")]
result <- GRanges(gene.chr.strand[, "seqnames"], IRanges(gene.locs[, 1], gene.locs[, 2]), gene.chr.strand[, "strand"])
if("gene.symbol" %in% colnames(counts.df))
{
values(result) <- DataFrame(gene = rownames(genes.FPKMs), gene.symbol = counts.df[gene.match, "gene.symbol"], genes.FPKMs)
} else {
values(result) <- DataFrame(gene = rownames(genes.FPKMs), genes.FPKMs)
}
result
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment