MatteoLacki · May 5, 2020 11:08
diff --git a/Renauld.R b/Renauld.R
 # hello Ute, long time no see.
 library(LFQBench2)
 library(data.table)
 library(readxl)
 library(stringr)
 library(rstatix)
 library(readr)
 # reading in the report
 R = read_wide_report('2019-107_115 Renauld remeasure_user designed 20200306-122426_quantification_report_UD_sent.xlsx',
                     skip=1, sheet="TOP3 quantification")

 # header = read_excel("2019-107_115 Renauld remeasure_user designed 20200306-122426_quantification_report_UD_sent.xlsx", 
 #                     sheet="TOP3 quantification"
 #                       "L1:", col_names = FALSE)
 PD = as.data.table(read_csv("projectDetails.csv"))
 colnames(PD) = make.names(colnames(PD))
 exp = as.data.table(str_split_fixed(PD$replicate_name, ' ', 2))
 colnames(exp) = c('state', 'replicate')
 PD = cbind(PD, exp)
 rm(exp)
 PD = PD[state != 'metastasis']

 uteTtests = as.data.table(read_csv("ute_t_tests.csv"))

 # subselect columns:
 normal_cols = grep("normal ", names(R), value = TRUE)
 tumor_cols  = grep("tumor ", names(R), value = TRUE)
 R_info = R[,description:entry]
 all(table(R_info$entry) == 1) # entry uniquely describes each protein and so is a key
 I_w = R[, c('entry', normal_cols, tumor_cols), with = FALSE]
 I = melt(I_w, id.vars='entry', variable.name ='exp', value.name = 'I')
 exp = as.data.table(str_split_fixed(I$exp, ' ', 2))
 colnames(exp) = c('state', 'replicate')
 I = cbind(I, exp)
 rm(exp)


 I_small = I[entry %in% c('IGK_HUMAN', 'IGLC2_HUMAN', 'IGL1_HUMAN')]

 # redoing Ute's analysis
 I_small[, .(uteTtest=t.test(I~state, data=.SD)$p.value), entry]
 # this should be done with t.test(I~state+entry, data=I_small), but it does not work.

 table(I[, uniqueN(state), entry]$V1)

 pvalsUte = sapply(split(I, I$entry), function(x) try(t.test(data=x, I~state$pvalue)))

 is.error <- function(x) inherits(x, "try-error")


 robustTtest = function(data, formula){
  r = try(t.test(formula, data), silent=TRUE)
  if(is.error(r)) return(-1)
  else return(r$p.value)
 }
 UteTest = function(data, formula){
  r = try(t.test(formula, data), silent=TRUE, na.action)
  if(is.error(r)){
    X = data[,.(NAcnt = sum(is.na(I))), state]
    if(any(X$NAcnt > 30)) return(0) else return(1)
  }
  else return(r$p.value)
 }

 res = I[,.(uteTtest=UteTest(.SD, I~state)), entry]
 res = uteTtests[res,on='entry']
 res[, real_ute_t_test := ifelse(real_ute_t_test==9999,1,real_ute_t_test)]

 hist(res$real_ute_t_test - res$uteTtest,
     breaks=1000, 
     main='Excel vs R',
     xlab='Excel.pvalue - R.pvalue')


 res[, uteTtestFDR := p.adjust(uteTtest, 'fdr')]
 hist(res$uteTtestFDR, breaks=100, xlab="Ute's T-test FDR adjusted", main='')

 I[, exp:=as.character(exp)]
 I = merge(I, PD[,.(replicate_name, Patient.No)], by.x = 'exp', by.y='replicate_name')

 bigTest = I[, .(t=UteTest(.SD, I~state)), .(entry, Patient.No)]
 bigTest[, tFDR:=p.adjust(t, 'fdr')]

 bigTest[,`:=`(pat_t = paste0('t_', Patient.No),
              pat_t_FDR=paste0('tFDR_', Patient.No) )]
 # hist(bigTest[uteTtest <= 1]$uteTtest, breaks=100)
 res = res[merge(dcast(bigTest, entry~pat_t, value.var='t'),
                dcast(bigTest, entry~pat_t_FDR, value.var='tFDR'),
                by='entry'),
          on='entry']

 res = R_info[res, on='entry']
 write.csv(res, file='results.csv')
	# hello Ute, long time no see.
	library(LFQBench2)
	library(data.table)
	library(readxl)
	library(stringr)
	library(rstatix)
	library(readr)
	# reading in the report
	R = read_wide_report('2019-107_115 Renauld remeasure_user designed 20200306-122426_quantification_report_UD_sent.xlsx',
	skip=1, sheet="TOP3 quantification")

	# header = read_excel("2019-107_115 Renauld remeasure_user designed 20200306-122426_quantification_report_UD_sent.xlsx",
	# sheet="TOP3 quantification"
	# "L1:", col_names = FALSE)
	PD = as.data.table(read_csv("projectDetails.csv"))
	colnames(PD) = make.names(colnames(PD))
	exp = as.data.table(str_split_fixed(PD$replicate_name, ' ', 2))
	colnames(exp) = c('state', 'replicate')
	PD = cbind(PD, exp)
	rm(exp)
	PD = PD[state != 'metastasis']

	uteTtests = as.data.table(read_csv("ute_t_tests.csv"))

	# subselect columns:
	normal_cols = grep("normal ", names(R), value = TRUE)
	tumor_cols = grep("tumor ", names(R), value = TRUE)
	R_info = R[,description:entry]
	all(table(R_info$entry) == 1) # entry uniquely describes each protein and so is a key
	I_w = R[, c('entry', normal_cols, tumor_cols), with = FALSE]
	I = melt(I_w, id.vars='entry', variable.name ='exp', value.name = 'I')
	exp = as.data.table(str_split_fixed(I$exp, ' ', 2))
	colnames(exp) = c('state', 'replicate')
	I = cbind(I, exp)
	rm(exp)


	I_small = I[entry %in% c('IGK_HUMAN', 'IGLC2_HUMAN', 'IGL1_HUMAN')]

	# redoing Ute's analysis
	I_small[, .(uteTtest=t.test(I~state, data=.SD)$p.value), entry]
	# this should be done with t.test(I~state+entry, data=I_small), but it does not work.

	table(I[, uniqueN(state), entry]$V1)

	pvalsUte = sapply(split(I, I$entry), function(x) try(t.test(data=x, I~state$pvalue)))

	is.error <- function(x) inherits(x, "try-error")


	robustTtest = function(data, formula){
	r = try(t.test(formula, data), silent=TRUE)
	if(is.error(r)) return(-1)
	else return(r$p.value)
	}
	UteTest = function(data, formula){
	r = try(t.test(formula, data), silent=TRUE, na.action)
	if(is.error(r)){
	X = data[,.(NAcnt = sum(is.na(I))), state]
	if(any(X$NAcnt > 30)) return(0) else return(1)
	}
	else return(r$p.value)
	}

	res = I[,.(uteTtest=UteTest(.SD, I~state)), entry]
	res = uteTtests[res,on='entry']
	res[, real_ute_t_test := ifelse(real_ute_t_test==9999,1,real_ute_t_test)]

	hist(res$real_ute_t_test - res$uteTtest,
	breaks=1000,
	main='Excel vs R',
	xlab='Excel.pvalue - R.pvalue')


	res[, uteTtestFDR := p.adjust(uteTtest, 'fdr')]
	hist(res$uteTtestFDR, breaks=100, xlab="Ute's T-test FDR adjusted", main='')

	I[, exp:=as.character(exp)]
	I = merge(I, PD[,.(replicate_name, Patient.No)], by.x = 'exp', by.y='replicate_name')

	bigTest = I[, .(t=UteTest(.SD, I~state)), .(entry, Patient.No)]
	bigTest[, tFDR:=p.adjust(t, 'fdr')]

	bigTest[,`:=`(pat_t = paste0('t_', Patient.No),
	pat_t_FDR=paste0('tFDR_', Patient.No) )]
	# hist(bigTest[uteTtest <= 1]$uteTtest, breaks=100)
	res = res[merge(dcast(bigTest, entry~pat_t, value.var='t'),
	dcast(bigTest, entry~pat_t_FDR, value.var='tFDR'),
	by='entry'),
	on='entry']

	res = R_info[res, on='entry']
	write.csv(res, file='results.csv')