Last active
August 29, 2015 14:08
-
-
Save brian-bot/3f2b74694f58404c93fd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
runAttractorInCRCSC <- function(projectId){ | |
require(synapseClient) | |
thisFile <- "https://gist.github.com/brian-bot/3f2b74694f58404c93fd" | |
## PULL FROM THE LINK THAT WAS CREATED IN THE PROJECT AT AN EARLIER STAGE | |
findLink <- synapseQuery(paste('SELECT id, name FROM link WHERE benefactorId=="', projectId, '"')) | |
resolveLink <- synGet(findLink$link.id[findLink$link.name=="COAD.attractors.rnaseq.txt"][1]) | |
## GET THE ATTRACTOR METAGENES FOR TCGA COAD FROM THE LINK | |
message("... reading in attractor metagenes ...") | |
synAmgCoad <- synGet(unlist(resolveLink$properties$linksTo)) | |
amgCoad <- read.delim(getFileLocation(synAmgCoad), as.is=T) | |
attractorGenes <- sapply(strsplit(amgCoad$Attractor001.Genes, "|", fixed=T), "[[", 1) | |
## GRAB THE CURATED RNA-SEQ EXPRESSION DATA ACROSS TCGA COAD AND READ | |
message("... downloading expression matrix from CRCSC ...") | |
crcExpr <- synGet("syn2326100") | |
message("... reading in large expression matrix from CRCSC (this might take a minute) ...") | |
crc <- read.delim(getFileLocation(crcExpr), as.is=T, row.names=1, check.names=FALSE) | |
crc <- crc[apply(crc, 1, sd) !=0, ] | |
## SUBSET TO ONLY ATTRACTORS PULLED FROM COAD | |
message("... computing the top attractor in CRCSC ...") | |
crcAm <- colMeans(crc[attractorGenes, ]) | |
## LOOK AT RANK-BASED CORRELATION (SPEARMAN) | |
message("... calculating spearman correlation with all genes in CRCSC dataset ...") | |
amCorr <- apply(crc, 1, function(x){ | |
cor(x, crcAm, method = "spearman") | |
}) | |
amCorr <- data.frame(gene=rownames(crc), spearmanRho=amCorr, stringsAsFactors = FALSE) | |
amCorr <- amCorr[order(amCorr$spearmanRho), ] | |
## GRAB GENE WITH THE MOST NEGATIVE CORRELATION | |
thisGene <- amCorr$gene[which(amCorr$spearmanRho==min(amCorr$spearmanRho, na.rm = TRUE))] | |
## WRITE OUT TO FILE TO STORE IN SYNAPSE | |
fileName <- file.path(tempdir(), "coadAttractorMetagene-correlationCRCSC.tsv") | |
write.table(amCorr, file=fileName, quote=FALSE, sep="\t", row.names=FALSE) | |
## FIND FOLDER TO STORE RESULTS | |
fq <- synapseQuery(paste('SELECT id, name FROM folder WHERE benefactorId=="', projectId, '"')) | |
## CREATE PROVENANCE: DESCRIBE WHAT WAS USED TO GENERATE THE RESULT | |
message("... recording provenance for this analysis ...") | |
act <- synStore(Activity(name="COAD Attractor Metagene correlation with CRCSC TCGA", | |
used=list(list(entity=synAmgCoad, wasExecuted=F), | |
list(entity=crcExpr, wasExecuted=F), | |
list(url=thisFile, name="runAttractorInCRCSC.R", wasExecuted=T)))) | |
## SET ANNOTATIONS AND STORE RESULT BACK IN SYNAPSE | |
message("... storing correlation analysis back in Synapse ...") | |
synFile <- File(path=fileName, parentId=fq$folder.id[ fq$folder.name=="results" ], min=thisGene) | |
annotValue(synFile, "projectId") <- projectId | |
synFile <- synStore(synFile, activity=act) | |
synUrl <- paste("https://synapse.org/#!Synapse:", synFile@properties$id, sep="") | |
## SHOW THE NEWLY CREATED RESULT | |
message("... FINISHED ...") | |
message(paste("... go here to see your result:", synUrl, "...")) | |
return(invisible(synFile)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment