Last active
December 4, 2020 18:10
-
-
Save MirzaCengic/fd5a5347f7237e8ad37494c4439b93f1 to your computer and use it in GitHub Desktop.
ensemble_modeling2.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cat("BIOMOD_EnsembleModeling2 loaded", "\n") | |
BIOMOD_EnsembleModeling2 <- function (modeling.output, chosen.models = "all", em.by = "PA_dataset+repet", | |
eval.metric = "all", eval.metric.quality.threshold = NULL, | |
models.eval.meth = c("KAPPA", "TSS", "ROC"), prob.mean = TRUE, | |
prob.cv = FALSE, prob.ci = FALSE, prob.ci.alpha = 0.05, prob.median = FALSE, | |
committee.averaging = FALSE, prob.mean.weight = FALSE, prob.mean.weight.decay = "proportional", | |
VarImport = 0) | |
{ | |
biomod2:::.bmCat("Build Ensemble Models") | |
args <- biomod2:::.BIOMOD_EnsembleModeling.check.args(modeling.output, | |
chosen.models, eval.metric, eval.metric.quality.threshold, | |
models.eval.meth, prob.mean, prob.cv, prob.ci, prob.ci.alpha, | |
prob.median, committee.averaging, prob.mean.weight, prob.mean.weight.decay, | |
em.by) | |
modeling.output <- args$modeling.output | |
chosen.models <- args$chosen.models | |
eval.metric <- args$eval.metric | |
eval.metric.quality.threshold <- args$eval.metric.quality.threshold | |
models.eval.meth <- args$models.eval.meth | |
prob.mean <- args$prob.mean | |
prob.cv <- args$prob.cv | |
prob.ci <- args$prob.ci | |
prob.ci.alpha <- args$prob.ci.alpha | |
prob.median <- args$prob.median | |
committee.averaging <- args$committee.averaging | |
prob.mean.weight <- args$prob.mean.weight | |
prob.mean.weight.decay <- args$prob.mean.weight.decay | |
em.by <- args$em.by | |
rm("args") | |
em.avail <- c("prob.mean", "prob.cv", "prob.ci.inf", "prob.ci.sup", | |
"prob.median", "committee.averaging", "prob.mean.weight") | |
em.algo <- em.avail[c(prob.mean, prob.cv, prob.ci, prob.ci, | |
prob.median, committee.averaging, prob.mean.weight)] | |
Options <- list(em.by = em.by) | |
expl_var_type = get_var_type(get_formal_data(modeling.output, | |
"expl.var")) | |
expl_var_range = get_var_range(get_formal_data(modeling.output, | |
"expl.var")) | |
EM <- new("BIOMOD.EnsembleModeling.out", sp.name = modeling.output@sp.name, | |
expl.var.names = modeling.output@expl.var.names, em.by = em.by, | |
modeling.id = modeling.output@modeling.id) | |
EM@models.out.obj@link <- file.path(modeling.output@sp.name, | |
paste(modeling.output@sp.name, ".", modeling.output@modeling.id, | |
".models.out", sep = "")) | |
em.mod.assemb <- biomod2:::.em.models.assembling(chosen.models, em.by) | |
for (assemb in names(em.mod.assemb)) { | |
cat("\n\n >", assemb, "ensemble modeling") | |
models.kept <- em.mod.assemb[[assemb]] | |
if (modeling.output@has.evaluation.data) { | |
eval.obs <- get_formal_data(modeling.output, "eval.resp.var") | |
eval.expl <- get_formal_data(modeling.output, "eval.expl.var") | |
} | |
obs <- get_formal_data(modeling.output, "resp.var") | |
expl <- get_formal_data(modeling.output, "expl.var") | |
if (em.by %in% c("PA_dataset", "PA_dataset+algo", "PA_dataset+repet")) { | |
if (unlist(strsplit(assemb, "_"))[3] != "AllData") { | |
if (inherits(get_formal_data(modeling.output), | |
"BIOMOD.formated.data.PA")) { | |
kept_cells <- get_formal_data(modeling.output)@PA[, | |
unlist(strsplit(assemb, "_"))[3]] | |
} | |
else { | |
kept_cells <- rep(T, length(obs)) | |
} | |
obs <- obs[kept_cells] | |
expl <- expl[kept_cells, , drop = F] | |
} | |
} | |
obs[is.na(obs)] <- 0 | |
needed_predictions <- biomod2:::.get_needed_predictions(modeling.output, | |
em.by, models.kept, eval.metric, eval.metric.quality.threshold) | |
if (!length(needed_predictions)) | |
next | |
for (eval.m in eval.metric) { | |
models.kept <- needed_predictions$models.kept[[eval.m]] | |
models.kept.scores <- needed_predictions$models.kept.scores[[eval.m]] | |
for (algo in em.algo) { | |
if (algo == "prob.mean") { | |
cat("\n > Mean of probabilities...") | |
model_name <- paste(modeling.output@sp.name, | |
"_", "EMmeanBy", eval.m, "_", assemb, sep = "") | |
model.bm <- new("EMmean_biomod2_model", model = models.kept, | |
model_name = model_name, model_class = "EMmean", | |
model_options = Options, resp_name = modeling.output@sp.name, | |
expl_var_names = modeling.output@expl.var.names, | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = modeling.output@modeling.id) | |
} | |
if (algo == "prob.cv") { | |
cat("\n > Coef of variation of probabilities...") | |
model_name <- paste(modeling.output@sp.name, | |
"_", "EMcvBy", eval.m, "_", assemb, sep = "") | |
model.bm <- new("EMcv_biomod2_model", model = models.kept, | |
model_name = model_name, model_class = "EMcv", | |
model_options = Options, resp_name = modeling.output@sp.name, | |
expl_var_names = modeling.output@expl.var.names, | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = modeling.output@modeling.id) | |
} | |
if (algo == "prob.median") { | |
cat("\n > Median of probabilities...") | |
model_name <- paste(modeling.output@sp.name, | |
"_", "EMmedianBy", eval.m, "_", assemb, sep = "") | |
model.bm <- new("EMmedian_biomod2_model", model = models.kept, | |
model_name = model_name, model_class = "EMmedian", | |
model_options = Options, resp_name = modeling.output@sp.name, | |
expl_var_names = modeling.output@expl.var.names, | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = modeling.output@modeling.id) | |
} | |
if (algo == "prob.ci.inf") { | |
cat("\n > Confidence Interval...") | |
model_name <- paste(modeling.output@sp.name, | |
"_", "EMciInfBy", eval.m, "_", assemb, sep = "") | |
model.bm <- new("EMci_biomod2_model", model = models.kept, | |
model_name = model_name, model_class = "EMci", | |
model_options = Options, resp_name = modeling.output@sp.name, | |
expl_var_names = modeling.output@expl.var.names, | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = modeling.output@modeling.id, | |
alpha = prob.ci.alpha, side = "inferior") | |
} | |
if (algo == "prob.ci.sup") { | |
model_name <- paste(modeling.output@sp.name, | |
"_", "EMciSupBy", eval.m, "_", assemb, sep = "") | |
model.bm <- new("EMci_biomod2_model", model = models.kept, | |
model_name = model_name, model_class = "EMci", | |
model_options = Options, resp_name = modeling.output@sp.name, | |
expl_var_names = modeling.output@expl.var.names, | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = modeling.output@modeling.id, | |
alpha = prob.ci.alpha, side = "superior") | |
} | |
if (algo == "committee.averaging") { | |
cat("\n > Committee averaging...") | |
model_name <- paste(modeling.output@sp.name, | |
"_", "EMcaBy", eval.m, "_", assemb, sep = "") | |
models.kept.tresh <- unlist(lapply(models.kept, | |
function(x) { | |
mod <- tail(unlist(strsplit(x, "_")), 3)[3] | |
run <- tail(unlist(strsplit(x, "_")), 3)[2] | |
dat <- tail(unlist(strsplit(x, "_")), 3)[1] | |
return(get_evaluations(modeling.output)[eval.m, | |
"Cutoff", mod, run, dat]) | |
})) | |
names(models.kept.tresh) <- models.kept | |
to_keep <- is.finite(models.kept.tresh) | |
model.bm <- new("EMca_biomod2_model", model = models.kept[to_keep], | |
model_name = model_name, model_class = "EMca", | |
model_options = Options, resp_name = modeling.output@sp.name, | |
expl_var_names = modeling.output@expl.var.names, | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = modeling.output@modeling.id, | |
tresholds = models.kept.tresh[to_keep]) | |
} | |
if (algo == "prob.mean.weight") { | |
cat("\n > Probabilities weighting mean...") | |
model_name <- paste(modeling.output@sp.name, | |
"_", "EMwmeanBy", eval.m, "_", assemb, sep = "") | |
models.kept.tmp <- models.kept | |
models.kept.scores.tmp <- models.kept.scores | |
if (eval.m == "ROC") { | |
sre.id <- grep("_SRE", models.kept) | |
if (length(sre.id) > 0) { | |
cat("\n ! SRE modeling were switched off") | |
models.kept.tmp <- models.kept[-sre.id] | |
models.kept.scores.tmp <- models.kept.scores[-sre.id] | |
} | |
} | |
models.kept.tmp <- models.kept.tmp[is.finite(models.kept.scores.tmp)] | |
models.kept.scores.tmp <- models.kept.scores.tmp[is.finite(models.kept.scores.tmp)] | |
models.kept.scores.tmp <- round(models.kept.scores.tmp, | |
3) | |
cat("\n\t\t", " original models scores = ", | |
models.kept.scores.tmp) | |
if (is.numeric(prob.mean.weight.decay)) { | |
DecayCount <- sum(models.kept.scores.tmp > | |
0) | |
WOrder <- order(models.kept.scores.tmp, decreasing = T) | |
Dweights <- models.kept.scores.tmp | |
for (J in 1:DecayCount) Dweights[WOrder[J]] <- I(prob.mean.weight.decay^(DecayCount - | |
J + 1)) | |
for (J in 1:length(models.kept.scores.tmp)) { | |
if (sum(models.kept.scores.tmp[J] == models.kept.scores.tmp) > | |
1) | |
Dweights[which(models.kept.scores.tmp[J] == | |
models.kept.scores.tmp)] <- mean(Dweights[which(models.kept.scores.tmp[J] == | |
models.kept.scores.tmp)]) | |
} | |
models.kept.scores.tmp <- round(Dweights, | |
digits = 3) | |
rm(list = c("Dweights", "DecayCount", "WOrder")) | |
} | |
else if (is.function(prob.mean.weight.decay)) { | |
models.kept.scores.tmp <- sapply(models.kept.scores.tmp, | |
prob.mean.weight.decay) | |
} | |
models.kept.scores.tmp <- round(models.kept.scores.tmp/sum(models.kept.scores.tmp, | |
na.rm = T), digits = 3) | |
cat("\n\t\t", " final models weights = ", models.kept.scores.tmp) | |
model.bm <- new("EMwmean_biomod2_model", model = models.kept.tmp, | |
model_name = model_name, model_class = "EMwmean", | |
model_options = Options, resp_name = modeling.output@sp.name, | |
expl_var_names = modeling.output@expl.var.names, | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = modeling.output@modeling.id, | |
penalization_scores = models.kept.scores.tmp) | |
} | |
pred.bm <- predict(model.bm, expl, formal_predictions = needed_predictions$predictions[, | |
model.bm@model, drop = F], on_0_1000 = T) | |
pred.bm.name <- paste0(model_name, ".predictions") | |
pred.bm.outfile <- file.path(model.bm@resp_name, | |
".BIOMOD_DATA", model.bm@modeling.id, "ensemble.models", | |
"ensemble.models.predictions", pred.bm.name) | |
dir.create(dirname(pred.bm.outfile), showWarnings = FALSE, | |
recursive = TRUE) | |
assign(pred.bm.name, pred.bm) | |
save(list = pred.bm.name, file = pred.bm.outfile, | |
compress = TRUE) | |
rm(list = pred.bm.name) | |
if (exists("eval.obs") & exists("eval.expl")) { | |
eval_pred.bm <- predict(model.bm, eval.expl) | |
pred.bm.name <- paste0(model_name, ".predictionsEval") | |
assign(pred.bm.name, eval_pred.bm) | |
save(list = pred.bm.name, file = pred.bm.outfile, | |
compress = TRUE) | |
rm(list = pred.bm.name) | |
} | |
if (length(models.eval.meth)) { | |
cat("\n\t\t\tEvaluating Model stuff...") | |
if (algo == "prob.cv") { | |
cross.validation <- matrix(NA, 4, length(models.eval.meth), | |
dimnames = list(c("Testing.data", "Cutoff", | |
"Sensitivity", "Specificity"), models.eval.meth)) | |
} | |
else { | |
if (em.by == "PA_dataset+repet") { | |
calib_lines <- get_calib_lines(modeling.output) | |
pa_dataset_id <- paste("_", unlist(strsplit(assemb, | |
"_"))[3], sep = "") | |
repet_id <- paste("_", unlist(strsplit(assemb, | |
"_"))[2], sep = "") | |
if (repet_id == "_Full") { | |
eval_lines <- rep(T, length(pred.bm)) | |
} | |
else { | |
eval_lines <- !na.omit(calib_lines[, | |
repet_id, pa_dataset_id]) | |
if (all(!eval_lines)) { | |
eval_lines <- !eval_lines | |
} | |
} | |
} | |
else { | |
eval_lines <- rep(T, length(pred.bm)) | |
} | |
cross.validation <- sapply(models.eval.meth, | |
Find.Optim.Stat, Fit = pred.bm[eval_lines], | |
Obs = obs[eval_lines]) | |
rownames(cross.validation) <- c("Testing.data", | |
"Cutoff", "Sensitivity", "Specificity") | |
} | |
if (exists("eval_pred.bm")) { | |
if (algo == "prob.cv") { | |
cross.validation <- matrix(NA, 5, length(models.eval.meth), | |
dimnames = list(c("Testing.data", "Evaluating.data", | |
"Cutoff", "Sensitivity", "Specificity"), | |
models.eval.meth)) | |
} | |
else { | |
true.evaluation <- sapply(models.eval.meth, | |
function(x) { | |
Find.Optim.Stat(Fit = eval_pred.bm * | |
1000, Obs = eval.obs, Fixed.thresh = cross.validation["Cutoff", | |
x]) | |
}) | |
cross.validation <- rbind(cross.validation["Testing.data", | |
], true.evaluation) | |
rownames(cross.validation) <- c("Testing.data", | |
"Evaluating.data", "Cutoff", "Sensitivity", | |
"Specificity") | |
} | |
} | |
cross.validation <- t(round(cross.validation, | |
digits = 3)) | |
model.bm@model_evaluation <- cross.validation | |
rm(list = c("cross.validation")) | |
} | |
if (VarImport > 0) { | |
cat("\n\t\t\tEvaluating Predictor Contributions...", | |
"\n") | |
variables.importance <- variables_importance(model.bm, | |
expl, nb_rand = VarImport) | |
model.bm@model_variables_importance <- variables.importance$mat | |
rm(list = c("variables.importance")) | |
} | |
assign(model_name, model.bm) | |
save(list = model_name, file = file.path(modeling.output@sp.name, | |
"models", modeling.output@modeling.id, model_name)) | |
EM@em.models <- c(EM@em.models, model.bm) | |
EM@em.computed <- c(EM@em.computed, model_name) | |
} | |
} | |
} | |
names(EM@em.models) <- EM@em.computed | |
model.name <- paste(EM@sp.name, ".", EM@modeling.id, "ensemble.models.out", | |
sep = "") | |
assign(x = model.name, value = EM) | |
save(list = model.name, file = file.path(EM@sp.name, model.name)) | |
biomod2:::.bmCat("Done") | |
return(EM) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment