Created
December 7, 2023 12:59
-
-
Save MatteoLacki/21a831e00a8db206411e28efd30db0da to your computer and use it in GitHub Desktop.
test_downcentric_whatever.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("impute") | |
# simulating a matrix | |
peptideCnt = 1000 | |
runsCnt = 6 | |
expressions = matrix( | |
data=rnorm(peptideCnt*runsCnt, mean=1000, sd=400), | |
nrow=peptideCnt, | |
ncol=runsCnt | |
) | |
draw_missing_at_random_mask = function(expressions, prob = c(0.75, 0.25)) sample( | |
x = c(FALSE, TRUE), | |
size = nrow(expressions) * ncol(expressions), | |
prob = prob, | |
replace = TRUE | |
) | |
missing_at_random_mask = draw_missing_at_random_mask(expressions) | |
table(missing_at_random_mask) | |
expressions_with_mocked_NAs = expressions | |
expressions_with_mocked_NAs[missing_at_random_mask] = NA | |
expressions_with_mocked_NAs_imputed = impute.knn(expressions_with_mocked_NAs, k=10)$data | |
nrow(expressions_with_mocked_NAs_imputed) | |
ncol(expressions_with_mocked_NAs_imputed) | |
# values not needing imputation untouched? YES | |
all(expressions_with_mocked_NAs_imputed[!missing_at_random_mask] == expressions[!missing_at_random_mask]) | |
# errors: | |
err = expressions_with_mocked_NAs_imputed[missing_at_random_mask] - expressions[missing_at_random_mask] | |
hist(err, n=100) | |
# then: comparison of 2 sample conditions. | |
cond_A = matrix( | |
data=rnorm(peptideCnt*runsCnt, mean=1000, sd=100), | |
nrow=peptideCnt, | |
ncol=runsCnt | |
) | |
cond_B = matrix( | |
data=rnorm(peptideCnt*runsCnt, mean=1200, sd=100), | |
nrow=peptideCnt, | |
ncol=runsCnt | |
) | |
# do t.tests | |
do_tests = function(cond_A, cond_B){ | |
if( nrow(cond_A) == nrow(cond_B) ){ | |
pvalues = numeric(nrow(cond_A)) | |
for( i in 1:nrow(cond_A)){ | |
test_result = t.test( cond_A[i,], cond_B[i,] ) | |
pvalues[i] = test_result$p.value | |
} | |
} | |
pvalues | |
} | |
p_values = do_tests(cond_A, cond_B) | |
adj_p_values = p.adjust(p_values, method="fdr") | |
plot(p_values[p_values< 0.1], adj_p_values[p_values< 0.1], pch=".") | |
abline(coef = c(0,1)) | |
abline(h=0.01) | |
abline(v=0.01) | |
all_discoveries_mask = adj_p_values < 0.01 | |
cond_A_mocked_NAs = cond_A | |
cond_A_mock_mask = draw_missing_at_random_mask(cond_A) | |
cond_A_mocked_NAs[cond_A_mock_mask] = NA | |
cond_A_mocked_NAs_imputed = impute.knn(cond_A_mocked_NAs, k=10)$data | |
cond_B_mocked_NAs = cond_B | |
cond_B_mock_mask = draw_missing_at_random_mask(cond_B) | |
cond_B_mocked_NAs[cond_B_mock_mask] = NA | |
cond_B_mocked_NAs_imputed = impute.knn(cond_B_mocked_NAs, k=10)$data | |
imputed_p_values = do_tests(cond_A_mocked_NAs_imputed, cond_B_mocked_NAs_imputed) | |
adj_imputed_p_values = p.adjust(imputed_p_values, method="fdr") | |
all_imputed_discoveries_mask = adj_imputed_p_values < 0.01 | |
table(all_discoveries_mask, all_imputed_discoveries_mask) | |
# should the values be imputed within groups? Which groups? Likely technical replicates. | |
# what is t.test doing with missing values as such? | |
?t.test | |
getOption("na.action") # so simply omits them |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment