Created
May 5, 2020 11:08
-
-
Save MatteoLacki/a51446147901b0b008e493bb90908e0d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# hello Ute, long time no see. | |
library(LFQBench2) | |
library(data.table) | |
library(readxl) | |
library(stringr) | |
library(rstatix) | |
library(readr) | |
# reading in the report | |
R = read_wide_report('2019-107_115 Renauld remeasure_user designed 20200306-122426_quantification_report_UD_sent.xlsx', | |
skip=1, sheet="TOP3 quantification") | |
# header = read_excel("2019-107_115 Renauld remeasure_user designed 20200306-122426_quantification_report_UD_sent.xlsx", | |
# sheet="TOP3 quantification" | |
# "L1:", col_names = FALSE) | |
PD = as.data.table(read_csv("projectDetails.csv")) | |
colnames(PD) = make.names(colnames(PD)) | |
exp = as.data.table(str_split_fixed(PD$replicate_name, ' ', 2)) | |
colnames(exp) = c('state', 'replicate') | |
PD = cbind(PD, exp) | |
rm(exp) | |
PD = PD[state != 'metastasis'] | |
uteTtests = as.data.table(read_csv("ute_t_tests.csv")) | |
# subselect columns: | |
normal_cols = grep("normal ", names(R), value = TRUE) | |
tumor_cols = grep("tumor ", names(R), value = TRUE) | |
R_info = R[,description:entry] | |
all(table(R_info$entry) == 1) # entry uniquely describes each protein and so is a key | |
I_w = R[, c('entry', normal_cols, tumor_cols), with = FALSE] | |
I = melt(I_w, id.vars='entry', variable.name ='exp', value.name = 'I') | |
exp = as.data.table(str_split_fixed(I$exp, ' ', 2)) | |
colnames(exp) = c('state', 'replicate') | |
I = cbind(I, exp) | |
rm(exp) | |
I_small = I[entry %in% c('IGK_HUMAN', 'IGLC2_HUMAN', 'IGL1_HUMAN')] | |
# redoing Ute's analysis | |
I_small[, .(uteTtest=t.test(I~state, data=.SD)$p.value), entry] | |
# this should be done with t.test(I~state+entry, data=I_small), but it does not work. | |
table(I[, uniqueN(state), entry]$V1) | |
pvalsUte = sapply(split(I, I$entry), function(x) try(t.test(data=x, I~state$pvalue))) | |
is.error <- function(x) inherits(x, "try-error") | |
robustTtest = function(data, formula){ | |
r = try(t.test(formula, data), silent=TRUE) | |
if(is.error(r)) return(-1) | |
else return(r$p.value) | |
} | |
UteTest = function(data, formula){ | |
r = try(t.test(formula, data), silent=TRUE, na.action) | |
if(is.error(r)){ | |
X = data[,.(NAcnt = sum(is.na(I))), state] | |
if(any(X$NAcnt > 30)) return(0) else return(1) | |
} | |
else return(r$p.value) | |
} | |
res = I[,.(uteTtest=UteTest(.SD, I~state)), entry] | |
res = uteTtests[res,on='entry'] | |
res[, real_ute_t_test := ifelse(real_ute_t_test==9999,1,real_ute_t_test)] | |
hist(res$real_ute_t_test - res$uteTtest, | |
breaks=1000, | |
main='Excel vs R', | |
xlab='Excel.pvalue - R.pvalue') | |
res[, uteTtestFDR := p.adjust(uteTtest, 'fdr')] | |
hist(res$uteTtestFDR, breaks=100, xlab="Ute's T-test FDR adjusted", main='') | |
I[, exp:=as.character(exp)] | |
I = merge(I, PD[,.(replicate_name, Patient.No)], by.x = 'exp', by.y='replicate_name') | |
bigTest = I[, .(t=UteTest(.SD, I~state)), .(entry, Patient.No)] | |
bigTest[, tFDR:=p.adjust(t, 'fdr')] | |
bigTest[,`:=`(pat_t = paste0('t_', Patient.No), | |
pat_t_FDR=paste0('tFDR_', Patient.No) )] | |
# hist(bigTest[uteTtest <= 1]$uteTtest, breaks=100) | |
res = res[merge(dcast(bigTest, entry~pat_t, value.var='t'), | |
dcast(bigTest, entry~pat_t_FDR, value.var='tFDR'), | |
by='entry'), | |
on='entry'] | |
res = R_info[res, on='entry'] | |
write.csv(res, file='results.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment