cwhelan · November 20, 2012 21:02
diff --git a/gistfile1.r b/gistfile1.r
 library(GenomicRanges)                                                                           
 library(rtracklayer)  
 library(bsseq)
 library(Homo.sapiens)

 # methylation summaries are data frames with columns, "chr", "start", "strand", "meth", "unmeth"

 # get them all in one data frame
 mergedSummary <- merge(normalMethylationSummary, cancerMethylationSummary,
                       by=c("chr", "start", "strand", "end"),
                       suffixes=c("normal","cancer"))

 # build the bsseq object with methylation (M) and coverage (C) matrices
 bs.fit <- BSseq(M=as.matrix(mergedSummary[,c("methnormal","methcancer")]),
                C=as.matrix(cbind(mergedSummary[,"methnormal"] + mergedSummary[,"unmethnormal"],
                                  mergedSummary[,"methcancer"] + mergedSummary[,"unmethcancer"])),
                pos=mergedSummary$start,
                chr=mergedSummary$chr,
                sampleNames=c("normal","cancer"))

 # need to order the BSseq object
 bs.fit <- orderBSseq(bs.fit)

 # compute the smoothed fit in parallel by chromosome using 4 cores (takes a while)
 bs.fit <- BSmooth(bs.fit, mc.cores = 4, verbose = TRUE, parallelBy = "chromosome")

 # load breakpoints
 bpAnchoringRegions <- import.bed('LC3961_vs_41_stringent_high_score_anchoring_regions.bed', asRangedData=FALSE)

 # find AML Genes (geneMaxRanges is contains the max start and end of all transcripts for a gene, precomputed)
 amlGeneSymbols <- c("ZBTB20", "CAV2", "NBEAL1", "CTDSPL", "WIF1", "SFRP1", "SFRP2", "SFRP4", "SFRP5", "DKK1", "PRDM16", "CDKN2B", "CDKN2A", "WT1", "DNMT3A", "DNMT3L", "DNMT3B", "DNMT1", "CEBPA",\
 "CDH1", "RUNX1")
 amlGeneIds <-  select(Homo.sapiens, keys=amlGeneSymbols, cols=c("ENTREZID"), keytype="SYMBOL")$ENTREZID
 amlGenes <- geneMaxRanges[amlGeneIds]

 # load other features
 cpgIslands <- import.gff('/u0/dbase/genomes/human/features/human_cpgislands.gff', asRangedData=FALSE)
 repmask <- import.gff3('/u0/dbase/genomes/human/features/human_repmask.gff', asRangedData=FALSE)
 repsByClass <- split(repmask, as.factor(mcols(repmask)$repClass))

 # build an annotations object
 annotations <- c(list(breakpoints=bpAnchoringRegions, exons=exons(hg19UCSCGenes),
                    "CpG islands"=cpgIslands),
                 as.list(repsByClass[c("LINE","SINE","Satellite","Simple_repeat")]))

 # set the colors for each sample
 pData <- pData(bs.fit)
 pData$col <- c("blue", "red")
 pData(bs.fit) <- pData

 # plot the smoothed values for each of the AML gene regions
 pdf('amlGenesSmoothed.pdf', width=10, height=9)
 plotManyRegions(bs.fit, unlist(amlGenes), extend=5000, main=amlGeneSymbols,
                annoTrack=annotations,
                )
 dev.off()
	library(GenomicRanges)
	library(rtracklayer)
	library(bsseq)
	library(Homo.sapiens)

	# methylation summaries are data frames with columns, "chr", "start", "strand", "meth", "unmeth"

	# get them all in one data frame
	mergedSummary <- merge(normalMethylationSummary, cancerMethylationSummary,
	by=c("chr", "start", "strand", "end"),
	suffixes=c("normal","cancer"))

	# build the bsseq object with methylation (M) and coverage (C) matrices
	bs.fit <- BSseq(M=as.matrix(mergedSummary[,c("methnormal","methcancer")]),
	C=as.matrix(cbind(mergedSummary[,"methnormal"] + mergedSummary[,"unmethnormal"],
	mergedSummary[,"methcancer"] + mergedSummary[,"unmethcancer"])),
	pos=mergedSummary$start,
	chr=mergedSummary$chr,
	sampleNames=c("normal","cancer"))

	# need to order the BSseq object
	bs.fit <- orderBSseq(bs.fit)

	# compute the smoothed fit in parallel by chromosome using 4 cores (takes a while)
	bs.fit <- BSmooth(bs.fit, mc.cores = 4, verbose = TRUE, parallelBy = "chromosome")

	# load breakpoints
	bpAnchoringRegions <- import.bed('LC3961_vs_41_stringent_high_score_anchoring_regions.bed', asRangedData=FALSE)

	# find AML Genes (geneMaxRanges is contains the max start and end of all transcripts for a gene, precomputed)
	amlGeneSymbols <- c("ZBTB20", "CAV2", "NBEAL1", "CTDSPL", "WIF1", "SFRP1", "SFRP2", "SFRP4", "SFRP5", "DKK1", "PRDM16", "CDKN2B", "CDKN2A", "WT1", "DNMT3A", "DNMT3L", "DNMT3B", "DNMT1", "CEBPA",\
	"CDH1", "RUNX1")
	amlGeneIds <- select(Homo.sapiens, keys=amlGeneSymbols, cols=c("ENTREZID"), keytype="SYMBOL")$ENTREZID
	amlGenes <- geneMaxRanges[amlGeneIds]

	# load other features
	cpgIslands <- import.gff('/u0/dbase/genomes/human/features/human_cpgislands.gff', asRangedData=FALSE)
	repmask <- import.gff3('/u0/dbase/genomes/human/features/human_repmask.gff', asRangedData=FALSE)
	repsByClass <- split(repmask, as.factor(mcols(repmask)$repClass))

	# build an annotations object
	annotations <- c(list(breakpoints=bpAnchoringRegions, exons=exons(hg19UCSCGenes),
	"CpG islands"=cpgIslands),
	as.list(repsByClass[c("LINE","SINE","Satellite","Simple_repeat")]))

	# set the colors for each sample
	pData <- pData(bs.fit)
	pData$col <- c("blue", "red")
	pData(bs.fit) <- pData

	# plot the smoothed values for each of the AML gene regions
	pdf('amlGenesSmoothed.pdf', width=10, height=9)
	plotManyRegions(bs.fit, unlist(amlGenes), extend=5000, main=amlGeneSymbols,
	annoTrack=annotations,
	)
	dev.off()