Last active
February 24, 2024 21:23
-
-
Save lwaldron/edea48dfda3c9db34b80a326f50fc5d1 to your computer and use it in GitHub Desktop.
Select some UniRef IDs from curatedMetagenomicData studies, join, write to file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
suppressPackageStartupMessages({ | |
library(curatedMetagenomicData) | |
library(mia) | |
library(dplyr) | |
library(purrr) | |
}) | |
datasets <- sampleMetadata |> | |
group_by(study_name) |> | |
count() |> | |
# filter(n < 25) |> # Remove datasets with 25+ samples as an example - comment this line for all datasets | |
arrange(desc(-n)) | |
datasets | |
(dsnames <- paste0(pull(datasets, study_name), ".gene_families")) | |
# Gene families of interest, including contributions from all species | |
families <- "UniRef90_A0A1D7PV13|UniRef90_A0A377VU77" | |
# Or to avoid the species-specific contributions, add a "$" after each UniRef ID: | |
# families <- "UniRef90_A0A1D7PV13$|UniRef90_A0A377VU77$" | |
# create a full-length list | |
res <- vector("list", length(dsnames)) | |
names(res) <- dsnames | |
for (i in seq_along(dsnames)){ | |
ds = dsnames[i] | |
if(file.exists(paste0(ds, ".rds"))){ | |
message("loading ", ds, " from file, n = ", datasets[i, "n"]) | |
res[[ds]] <- readRDS(paste0(ds, ".rds")) | |
}else{ | |
message("initializing ", ds, " from file, n = ", datasets[i, "n"]) | |
gf1 = curatedMetagenomicData(ds, dryrun = FALSE)[[1]] | |
gf1 <- gf1[grep(families, rownames(gf1)), ] | |
saveRDS(gf1, file = paste0(ds, ".rds")) | |
res[[ds]] <- gf1 | |
} | |
} | |
# Combine all SummarizedExperiments into a single SummarizedExperiment | |
isnull <- unlist(purrr::map(res, is.null)) | |
res <- res[!isnull] | |
hasrows <- unlist(purrr::map(res, nrow)) > 0 | |
res <- res[hasrows] | |
# Need to convert to ordinary matrix for join to work | |
for (i in 1:length(res)){ | |
assay(res[[i]]) <- as.matrix(assay(res[[i]])) | |
} | |
allres <- mia::mergeSEs(res, assay.type = "gene_families") | |
assay(allres)[is.na(assay(allres))] = 0.0 | |
write.csv(assay(allres), file = "gene_families.csv") | |
write.csv(colData(allres), file = "gene_families_metadata.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment