Created
May 26, 2011 08:35
-
-
Save DarioS/992778 to your computer and use it in GitHub Desktop.
Plot RNA exon and junction counts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.plotValues <- function(data, plot.columns, y.range, y.lab, plot.type, cols, ord, main, | |
g.symbol, first.plot) | |
{ | |
if(plot.type == "line") | |
{ | |
matplot(data[, plot.columns, drop = FALSE], ylim = y.range, type = 'l', lty = 1, lwd = 2, | |
ylab = y.lab, col = cols, main = main, xaxt = 'n') | |
axis(1, 1:nrow(data), ord) | |
} else { | |
plot.values <- t(data[, plot.columns, drop = FALSE]) | |
if(ncol(plot.values) < 5) | |
bar.x.lim <- c(1, 15) | |
else | |
bar.x.lim <- c(1, ncol(plot.values) * (nrow(plot.values) + 1)) | |
barplot(plot.values, xlim = bar.x.lim, ylim = y.range, names.arg = ord, | |
ylab = y.lab, col = cols, beside = TRUE, main = main) | |
} | |
if(first.plot) | |
title(g.symbol, outer = TRUE) | |
} | |
.shiftLabels <- function(labels.x, labels.y, x.min, x.max, n.models) | |
{ | |
lab.space <- 0.01 * (x.max - x.min) | |
labels.x <- labels.x[, order(labels.x[2, , drop = FALSE]), drop = FALSE] | |
max.consec <- ifelse(n.models <= 4, 6, 3) | |
consec = 1 | |
for(index in 2:length(labels.y)) | |
{ | |
if(abs(labels.x[1, index] - labels.x[1, index - 1]) < lab.space && consec < max.consec) | |
{ | |
labels.y[index] <- labels.y[index - 1] - 0.03 | |
consec = consec + 1 | |
} else { | |
consec = 1 | |
} | |
} | |
list(labels.x, labels.y) | |
} | |
.findLimits <- function(scores, diffs) | |
{ | |
if(any(is.finite(scores))) | |
{ | |
if(is.null(diffs)) y.min <- 0 else y.min <- min(scores[is.finite(scores)]) | |
y.max <- max(scores[is.finite(scores)]) | |
if(y.min > 0) y.min <- 0 | |
if(y.max < 0) y.max <- 0 | |
} else { | |
y.min <- -50 | |
y.max <- 50 | |
} | |
c(y.min, y.max) | |
} | |
setGeneric("rnaPlot", function(counts, ...) | |
{standardGeneric("rnaPlot")}) | |
setMethod("rnaPlot", c("GRanges"), | |
function(counts, diffs = NULL, scale = c("none", "log", "sqrt"), genes = NULL, tx.db, cols = NULL, | |
plot.columns = NULL, plot.type = c("line", "bar"), y.lab = '', | |
plot.exons = TRUE, plot.intronic = TRUE, plot.junctions = TRUE, | |
verbose = TRUE) | |
{ | |
if(is.null(cols)) | |
stop("'cols' not given.") | |
if(is.null(plot.columns) && is.null(diffs)) | |
stop("Columns to use in plotting not given. Can only be NULL if 'diffs' is given.") | |
plot.type <- match.arg(plot.type) | |
scale <- match.arg(scale) | |
counts.df <- as.data.frame(counts) | |
if(!is.null(diffs)) | |
{ | |
diffs.scores <- lapply(diffs, function(diff) | |
{ | |
diff <- strsplit(diff, " {0, }- {0, }")[[1]] | |
use.counts <- counts.df[, diff] | |
if(scale == "log") use.counts <- log2(use.counts) else use.counts <- sqrt(use.counts) | |
use.counts[, 2] - use.counts[, 1] | |
}) | |
names(diffs.scores) <- diffs | |
counts.df <- cbind(counts.df, diffs.scores) | |
plot.columns <- match(diffs, colnames(counts.df)) | |
} else { | |
plot.columns <- plot.columns + 5 # Offset for coordinates columns. | |
} | |
if(!is.null(genes)) | |
counts.df <- counts.df[counts.df[, "gene"] %in% genes, ] | |
counts.df$pos <- (counts.df$start + counts.df$end) / 2 | |
counts.df <- counts.df[order(counts.df[, "pos"]), ] | |
genes.scores <- split(as.data.frame(counts.df), factor(counts.df$gene, levels = genes)) | |
if("gene.symbol" %in% colnames(counts.df)) | |
symbols <- counts.df[match(names(genes.scores), counts.df$gene), "gene.symbol"] | |
else | |
symbols <- names(genes.scores) | |
# Get transcripts of each gene. | |
gene.txs <- transcriptsBy(tx.db, "gene") | |
gene.txs <- gene.txs[names(genes.scores)] | |
# Get exons of each transcript. | |
tx.exons <- exonsBy(tx.db, "tx") | |
n.graphs <- sum(plot.exons || plot.junctions, plot.intronic) | |
g.row <- match(names(genes.scores), counts.df[, "gene"]) | |
strands <- counts.df[g.row, "strand"] | |
chrs <- counts.df[g.row, "seqnames"] | |
par(oma = c(3, 1, 2, 1)) | |
feat.used <- character() | |
if(plot.exons) feat.used <- c(feat.used, "exon") | |
if(plot.junctions) feat.used <- c(feat.used, "junction") | |
if(plot.intronic) feat.used <- c(feat.used, "intronic") | |
if(verbose) message("Plotting genes.") | |
for(g.index in 1:length(symbols)) | |
{ | |
curr.scores <- genes.scores[[g.index]] | |
curr.txs <- gene.txs[[g.index]] | |
models.frac <- 1/2 + min(length(curr.txs), 5) / 10 | |
par(mar = c(1, 4, 3, 1)) | |
layout(matrix(1:(2*(n.graphs + 1)), ncol = 2, byrow = TRUE), | |
heights = c(rep(1/n.graphs, n.graphs), models.frac), widths = c(7, 1)) | |
g.strand <- strands[g.index] | |
y.range <- .findLimits(as.matrix(curr.scores[curr.scores[, "type"] %in% feat.used, plot.columns]), diffs) | |
if(plot.exons || plot.junctions) | |
{ | |
if(plot.exons) | |
keep <- "exon" | |
if(plot.junctions) | |
keep <- c(keep, "junction") | |
tx.scores <- curr.scores[curr.scores[, "type"] %in% keep, ] | |
if(g.strand == '+') | |
ex.jct.ord <- 1:nrow(tx.scores) | |
else | |
ex.jct.ord <- nrow(tx.scores):1 | |
tx.IR <- IRanges(tx.scores[, "start"], tx.scores[, "end"]) | |
.plotValues(tx.scores, plot.columns, y.range, y.lab, plot.type, cols, ex.jct.ord, | |
"Exons and Junctions", symbols[g.index], TRUE) | |
# Put legend on the side | |
par(mar = c(1, 0, 3, 0)) | |
plot.new() | |
legend("top", colnames(counts.df[, plot.columns, drop = FALSE]), fill = cols) | |
} | |
if(plot.intronic) | |
{ | |
par(mar = c(1, 4, 3, 1)) | |
is.first <- ifelse(plot.exons || plot.junctions, FALSE, TRUE) | |
in.scores <- curr.scores[curr.scores[, "type"] %in% "intronic", ] | |
if(nrow(in.scores) > 0) | |
{ | |
if(g.strand == '+') in.ord <- 1:nrow(in.scores) else in.ord <- nrow(in.scores):1 | |
.plotValues(in.scores, plot.columns, y.range, y.lab, plot.type, cols, in.ord, | |
"Intronic Gaps", symbols[g.index], is.first) | |
} else { | |
plot.new() | |
plot.window(c(0, 1), c(0, 1)) | |
text(0.5, 0.5, "No intronic regions\nfor this gene.") | |
} | |
par(mar = c(1, 0, 3, 0)) | |
plot.new() | |
if(is.first) | |
legend("top", colnames(counts.df[, plot.columns, drop = FALSE]), fill = cols) | |
} | |
x.min <- min(start(curr.txs)) | |
x.max <- max(end(curr.txs)) | |
x.space <- 0.01 * (x.max - x.min) | |
x.min <- x.min - x.space | |
x.max <- x.max + x.space | |
x.ticks <- seq(x.min, x.max, floor((x.max - x.min) / 10)) | |
par(mar = c(1, 2, 0, 1)) | |
plot.new() | |
plot.window(xlim = c(x.min, x.max), ylim = c(0, models.frac)) | |
axis(1, at = x.ticks) | |
n.models <- length(curr.txs) * (plot.exons || plot.junctions) + plot.intronic | |
model.ys <- models.frac - 0.05 - (models.frac - 0.05) / n.models * 0:(n.models - 1) | |
tx.ys <- if(plot.intronic) model.ys[-length(model.ys)] else model.ys | |
curr.txs.df <- as.data.frame(curr.txs) | |
gene.disjoint.exons <- tx.scores[tx.scores[, "type"] == "exon", c("start", "end")] | |
disjoint.exons.IR <- IRanges(gene.disjoint.exons[, 1], gene.disjoint.exons[, 2]) | |
mapply(function(x, y) | |
{ | |
labels.x <- NULL | |
if(plot.exons) | |
{ | |
curr.exons <- ranges(tx.exons[[y]]) | |
curr.exons <- disjoint.exons.IR[queryHits(findOverlaps(disjoint.exons.IR, curr.exons, type = "within"))] | |
if(g.strand == '-') curr.exons <- rev(curr.exons) | |
ex.ord.id <- ex.jct.ord[intersect(subjectHits(findOverlaps(curr.exons, tx.IR, type = "equal")), which(tx.scores[, "type"] == "exon"))] | |
# Draw exon rectangles. | |
mapply(function(st, end) | |
{ | |
rect(st, x - 0.02, end, x + 0.02, col = "darkblue", lty = 0) | |
}, start(curr.exons), end(curr.exons)) | |
labels.x <- rbind((start(curr.exons) + end(curr.exons)) / 2, ex.ord.id) | |
} | |
# Draw junction lines and arrowheads. | |
if(plot.junctions) | |
{ | |
curr.juncts <- gaps(curr.exons) | |
if(length(curr.juncts) > 0) | |
{ | |
junct.ord.id <- ex.jct.ord[subjectHits(findOverlaps(curr.juncts, tx.IR, type = "equal"))] | |
mapply(function(st, end) | |
{ | |
middle <- (st + end) / 2 | |
lines(c(st, middle, middle, end), x + c(0.02, 0.04, 0.04, 0.02), col = "darkblue") | |
}, start(curr.juncts), end(curr.juncts)) | |
labels.x <- cbind(labels.x, rbind((start(curr.juncts) + end(curr.juncts)) / 2, junct.ord.id)) | |
} | |
} | |
labels.y <- rep(x - 0.04, ncol(labels.x)) | |
if(ncol(labels.x) > 1) | |
{ | |
xy.list <- .shiftLabels(labels.x, labels.y, x.min, x.max, n.models) | |
labels.x <- xy.list[[1]] | |
labels.y <- xy.list[[2]] | |
} | |
text(labels.x[1, ], labels.y, labels.x[2, ]) | |
}, tx.ys, curr.txs.df[, "tx_id"]) | |
if(plot.intronic) | |
{ | |
y.in <- model.ys[length(model.ys)] | |
if(nrow(in.scores) > 0) | |
{ | |
mapply(function(st, end, col) | |
{ | |
lines(c(st, end), rep(y.in, 2), col = col) | |
}, in.scores[, "start"], in.scores[, "end"], "darkblue") | |
labels.x <- rbind(in.scores[, "pos"], in.ord) | |
labels.y <- rep(y.in - 0.03, nrow(in.scores)) | |
if(ncol(labels.x) > 1) | |
{ | |
xy.list <- .shiftLabels(labels.x, labels.y, x.min, x.max, n.models) | |
labels.x <- xy.list[[1]] | |
labels.y <- xy.list[[2]] | |
} | |
text(labels.x[1, ], labels.y, labels.x[2, ]) | |
} else { | |
text(mean(c(x.min, x.max)), y.in, "No intronic regions\nfor this gene.") | |
} | |
} | |
mtext(chrs[g.index], 1, 3) | |
par(mar = c(1, 1, 0, 0)) | |
plot.new() | |
plot.window(xlim = c(0, 1), ylim = c(0, models.frac)) | |
text(0.5, model.ys, c(as.character(curr.txs.df[, "tx_name"]), if(plot.intronic) "Intronic Gaps")) | |
} | |
}) |
23 August: Now handles the case of disjoint exons in the count object.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
22 August : The user gives the plot y-label now. Trying to auto-generate in became too hard.