Created
November 20, 2012 21:02
-
-
Save cwhelan/4121097 to your computer and use it in GitHub Desktop.
Using BSSmooth / bsseq to plot methylation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(GenomicRanges) | |
library(rtracklayer) | |
library(bsseq) | |
library(Homo.sapiens) | |
# methylation summaries are data frames with columns, "chr", "start", "strand", "meth", "unmeth" | |
# get them all in one data frame | |
mergedSummary <- merge(normalMethylationSummary, cancerMethylationSummary, | |
by=c("chr", "start", "strand", "end"), | |
suffixes=c("normal","cancer")) | |
# build the bsseq object with methylation (M) and coverage (C) matrices | |
bs.fit <- BSseq(M=as.matrix(mergedSummary[,c("methnormal","methcancer")]), | |
C=as.matrix(cbind(mergedSummary[,"methnormal"] + mergedSummary[,"unmethnormal"], | |
mergedSummary[,"methcancer"] + mergedSummary[,"unmethcancer"])), | |
pos=mergedSummary$start, | |
chr=mergedSummary$chr, | |
sampleNames=c("normal","cancer")) | |
# need to order the BSseq object | |
bs.fit <- orderBSseq(bs.fit) | |
# compute the smoothed fit in parallel by chromosome using 4 cores (takes a while) | |
bs.fit <- BSmooth(bs.fit, mc.cores = 4, verbose = TRUE, parallelBy = "chromosome") | |
# load breakpoints | |
bpAnchoringRegions <- import.bed('LC3961_vs_41_stringent_high_score_anchoring_regions.bed', asRangedData=FALSE) | |
# find AML Genes (geneMaxRanges is contains the max start and end of all transcripts for a gene, precomputed) | |
amlGeneSymbols <- c("ZBTB20", "CAV2", "NBEAL1", "CTDSPL", "WIF1", "SFRP1", "SFRP2", "SFRP4", "SFRP5", "DKK1", "PRDM16", "CDKN2B", "CDKN2A", "WT1", "DNMT3A", "DNMT3L", "DNMT3B", "DNMT1", "CEBPA",\ | |
"CDH1", "RUNX1") | |
amlGeneIds <- select(Homo.sapiens, keys=amlGeneSymbols, cols=c("ENTREZID"), keytype="SYMBOL")$ENTREZID | |
amlGenes <- geneMaxRanges[amlGeneIds] | |
# load other features | |
cpgIslands <- import.gff('/u0/dbase/genomes/human/features/human_cpgislands.gff', asRangedData=FALSE) | |
repmask <- import.gff3('/u0/dbase/genomes/human/features/human_repmask.gff', asRangedData=FALSE) | |
repsByClass <- split(repmask, as.factor(mcols(repmask)$repClass)) | |
# build an annotations object | |
annotations <- c(list(breakpoints=bpAnchoringRegions, exons=exons(hg19UCSCGenes), | |
"CpG islands"=cpgIslands), | |
as.list(repsByClass[c("LINE","SINE","Satellite","Simple_repeat")])) | |
# set the colors for each sample | |
pData <- pData(bs.fit) | |
pData$col <- c("blue", "red") | |
pData(bs.fit) <- pData | |
# plot the smoothed values for each of the AML gene regions | |
pdf('amlGenesSmoothed.pdf', width=10, height=9) | |
plotManyRegions(bs.fit, unlist(amlGenes), extend=5000, main=amlGeneSymbols, | |
annoTrack=annotations, | |
) | |
dev.off() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment