Skip to content

Instantly share code, notes, and snippets.

@davebridges
Created November 21, 2012 14:15
Show Gist options
  • Save davebridges/4125043 to your computer and use it in GitHub Desktop.
Save davebridges/4125043 to your computer and use it in GitHub Desktop.
Generate Exon and Transcript Counts Table
require(GenomicFeatures)
require(biomaRt)
#set this to where your tophat_out directory is.
working_directory = "/some_directory/"
setwd(working_directory)
#make a database of transcripts from the ensembl assembly
#first get the current release 69 from ensembl, change this to your species if not using mice
txdb <- makeTranscriptDbFromBiomart(biomart="ensembl",dataset = 'mmusculus_gene_ensembl')
#make exon and transcript annotation objects
save(txdb, file="txdb.Robject")
exons <- exons(txdb, columns=c('gene_id', 'tx_id', 'tx_name', 'tx_chrom', 'tx_strand', 'exon_id', 'exon_name', 'cds_id', 'cds_name', 'cds_chrom', 'cds_strand', 'exon_rank'))
save(exons, file="exons.Robject")
transcripts <- transcripts(txdb, columns=c('gene_id', 'tx_id', 'tx_name', 'exon_id', 'exon_name', 'exon_chrom', 'exon_strand', 'cds_id', 'cds_name', 'cds_chrom', 'cds_strand', 'exon_rank'))
require(GenomicRanges)
require(Rsamtools)
#set list of sample ids as a vector
sample_ids = seq(12849,12858)
transcript.countsTable <- data.frame(
row.names = as.vector(unlist(elementMetadata(transcripts)['tx_name'])))
exon.countsTable <- data.frame(
row.names = as.vector(unlist(elementMetadata(exons)['exon_name'])))
#this forloop iterates over the sample_ids and generates exon and transcript counts for each sample_id
for(sample_id in sample_ids) {
#read alignment
align <- readBamGappedAlignments(sprintf("tophat_out/Sample_%s/accepted_hits.bam", sample_id))
#count the overlapping reads for the transcripts
transcript.counts <- countOverlaps(transcripts, align)
#reassign to a specific transcript.counts object.
assign(sprintf("transcript.counts.%s", sample_id), transcript.counts)
#add this column to the countsTable
transcript.countsTable <- cbind(transcript.countsTable,transcript.counts)
remove(transcript.counts)
#count the overlapping reads for the exons
exon.counts <- countOverlaps(exons, align)
#reassign to a specific transcript.counts object.
assign(sprintf("exon.counts.%s", sample_id), exon.counts)
#add this column to the countsTable
exon.countsTable <- cbind(exon.countsTable, exon.counts)
#remove the align, transcript.counts and exon.counts objects for the next loop
remove(align)
remove(transcript.counts)
remove(exon.counts)
}
summary(transcriptCountsTable)
summary(exonCountsTable)
#write these two counts tables to csv files.
write.csv(transcriptCountsTable, "transcript_counts_table.csv")
write.csv(exonCountsTable, "exon_counts_table.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment