Last active
October 4, 2022 00:46
-
-
Save scbrown86/87a8c49117d0be7ea8730a6f4d17fa2c to your computer and use it in GitHub Desktop.
extract background points from hypervolumes and calculate AUC and Boyce index
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##%######################################################%## | |
# # | |
#### Extracts probability estimates #### | |
#### from calibration/validation points #### | |
#### for n-dimensional hypervolumes, #### | |
#### and compares these to #### | |
#### background values from the same hypervolume. #### | |
#### Values are then rescaled to {0, 1} #### | |
#### based on 10% omission rate, with #### | |
#### upper values capped at 90%. AUC and #### | |
#### Boyce indices are then calculated on rescaled #### | |
#### data. Stuart C Brown - 2021-10-18 #### | |
# # | |
##%######################################################%## | |
# function for background probability | |
## essentially the same as hypervolume::hypervolume_estimate_probability() | |
## but removes rows/weights where distances are 0. | |
## This is necessary because randomly sampled background points will have dist of 0 to itself | |
## which then becomes Inf when weighted | |
## need to provide n.points and a random.seed for sampling background points | |
background_probability <- function(hv, n.points = 10000, weight.exponent = -1, | |
random.seed = 65, set.edges.zero = TRUE, | |
edges.zero.distance.factor = 1, verbose = TRUE) { | |
require(pbapply) | |
np <- nrow(hv@RandomPoints) | |
dimhv <- ncol(hv@RandomPoints) | |
numpointstokeep_hv <- floor(min(c(n.points, np))) | |
if (numpointstokeep_hv != n.points & verbose) { | |
warning(sprintf("There were less than %s background points available. Using %s points. See hv@RandomPoints.", n.points, np), | |
call. = FALSE, immediate. = TRUE, noBreaks. = TRUE) | |
} | |
## set random seed for reproducibility | |
## only an issue of n.points is less than the number of random points in the hv | |
{set.seed(random.seed); index_ss <- sample(1:np, numpointstokeep_hv, replace = FALSE)} | |
hv_points_ss <- hv@RandomPoints[index_ss, , drop = FALSE] | |
prob_ss <- hv@ValueAtRandomPoints[index_ss] | |
point_density <- nrow(hv_points_ss)/hv@Volume | |
cutoff_dist <- point_density^(-1/dimhv) | |
if (verbose) { | |
cat(sprintf("Retaining %d/%d hypervolume random points for comparison with %d test points.\n", | |
nrow(hv_points_ss), nrow(hv@RandomPoints), np)) | |
} | |
if (!verbose) { | |
pbapply::pboptions(type = "none") | |
} else { | |
pbapply::pboptions(type = "timer") | |
} | |
points <- as.matrix(hv_points_ss) | |
probabilities <- pbsapply(1:nrow(points), function(i) { | |
distances <- pdist::pdist(points[i, , drop = FALSE], hv_points_ss)@dist | |
# get indices for distance == 0. Self-distance. | |
dist0 <- which(distances == 0) | |
weights <- distances^weight.exponent | |
# exlude dist == 0 from results. Otherwise Inf. | |
result <- sum(prob_ss[-dist0] * weights[-dist0])/sum(weights[-dist0]) | |
if (set.edges.zero == TRUE) { | |
if (min(distances) > cutoff_dist * edges.zero.distance.factor) { | |
result <- 0 | |
} | |
} | |
return(result) | |
}, simplify = TRUE, USE.NAMES = FALSE) | |
return(probabilities) | |
} | |
TEST <- FALSE | |
if (TEST) { | |
library(data.table) | |
library(hypervolume) | |
library(scales) | |
library(raster) | |
library(rnaturalearth) | |
## enmSdm must must be installed from github - see below | |
if (!require(enmSdm)) { | |
library(remotes) | |
Sys.setenv(R_REMOTES_NO_ERRORS_FROM_WARNINGS = "true") | |
install_github("adamlilith/enmSdm", dependencies = TRUE) | |
install_github("adamlilith/omnibus") | |
library(enmSdm) | |
} else { | |
library(enmSdm) | |
} | |
# load in lat/lon data | |
data("quercus") | |
data_alba <- subset(quercus, Species == "Quercus alba")[, c("Longitude","Latitude")] | |
# get worldclim data from internet | |
climatelayers <- getData("worldclim", var = "bio", res = 10) | |
# z-transform climate layers to make axes comparable | |
climatelayers_ss <- climatelayers[[c(1,4,12,15)]] | |
climatelayers_ss <- scale(climatelayers_ss, center = TRUE, scale = TRUE) | |
climatelayers_ss_cropped <- aggregate(crop(climatelayers_ss, extent(-150,-50,15,60)), 3) | |
# extract transformed climate values | |
climate_alba <- extract(climatelayers_ss_cropped, data_alba) | |
{set.seed(54); valIDX <- sort(sample(1:nrow(climate_alba), 0.1 * nrow(climate_alba), replace = FALSE))} | |
# calibration set | |
climate_alba_cal <- climate_alba[-valIDX, ] | |
# validation set | |
climate_alba_val <- climate_alba[valIDX, ] | |
# compute Ggaussian hypervolume. | |
hv_alba <- hypervolume_gaussian(climate_alba_cal, name = "alba", samples.per.point = 10) | |
hv_alba | |
plot(hv_alba, | |
plot.function.additional = function(i, j) { | |
points(x = climate_alba_val[, i], y = climate_alba_val[, j], | |
col = "blue2", pch = 16)}, | |
show.legend = FALSE) | |
# % of val points in hv | |
round(sum(hypervolume_inclusion_test(hv_alba, points = climate_alba_val, verbose = FALSE, | |
fast.or.accurate = "accurate"))/nrow(climate_alba_val), 2)*100 | |
# probabilities | |
cal_hs <- hypervolume_estimate_probability(hv_alba, climate_alba_cal) # calibration points | |
val_hs <- hypervolume_estimate_probability(hv_alba, climate_alba_val) # validation points | |
# extract values from random points (equivalent to background points) | |
## see ?`Hypervolume-class` for details | |
## 10,000 or total (smaller or two) of background points sampled | |
bg_scores <- background_probability(hv_alba, n.points = 10000, random.seed = 20211018, | |
verbose = TRUE) | |
{par(mfrow = c(2,2)) | |
hist(cal_hs, breaks = 20) | |
abline(v = quantile(cal_hs, probs = c(0.10, 0.90)), col = "red", lty = 2) | |
hist(val_hs, breaks = 20) | |
abline(v = quantile(cal_hs, probs = c(0.10, 0.90)), col = "red", lty = 2) | |
hist(bg_scores, breaks = 20) | |
abline(v = quantile(cal_hs, probs = c(0.10, 0.90)), col = "red", lty = 2) | |
text(mean(cal_hs), 450, "red lines show\np10 and p90\nof cal_hs") | |
par(mfrow = c(1,1))} | |
# Apply p10 threshold to exlude 10% of training presences | |
## p10 is defined from calibration data | |
p10 <- quantile(cal_hs, 0.10) | |
p90 <- quantile(cal_hs, 0.90) | |
# Threshold. Anything <= p10 becomes NA, anything > p90 becomes p90 | |
## thresholded scores are then rescaled to {0, 1} | |
cal_hs[cal_hs <= p10] <- NA; cal_hs[cal_hs > p90] <- p90; cal_hs <- rescale(cal_hs, from = c(p10, p90), to = c(0, 1)) | |
val_hs[val_hs <= p10] <- NA; val_hs[val_hs > p90] <- p90; val_hs <- rescale(val_hs, from = c(p10, p90), to = c(0, 1)) | |
bg_scores[bg_scores <= p10] <- NA; bg_scores[bg_scores > p90] <- p90; bg_scores <- rescale(bg_scores, from = c(p10, p90), to = c(0, 1)) | |
{par(mfrow = c(2,2)) | |
hist(cal_hs, breaks = 20) | |
hist(val_hs, breaks = 20) | |
hist(bg_scores, breaks = 20) | |
par(mfrow = c(1,1))} | |
# AUC | |
aucWeighted(cal_hs, bg_scores, na.rm = TRUE) | |
aucWeighted(val_hs, bg_scores, na.rm = TRUE) | |
# Boyce | |
contBoyce(cal_hs, bg_scores, na.rm = TRUE, binWidth = 0.1) | |
contBoyce(val_hs, bg_scores, na.rm = TRUE, binWidth = 0.1) | |
contBoyce2x(cal_hs, bg_scores, na.rm = TRUE) | |
contBoyce2x(val_hs, bg_scores, na.rm = TRUE) | |
# Spatial projection | |
alba_map <- hypervolume_project(hv_alba, climatelayers_ss_cropped, set.edges.zero = TRUE) | |
plot(alba_map, col = hcl.colors(100, "Green-Yellow", rev = TRUE), | |
legend = TRUE, main = "Quercus alba [raw scale]", | |
sub = "Calibration points in blue; validation points in red", | |
addfun = function() { | |
lines(ne_coastline(110, "sp")) | |
points(data_alba[-valIDX, ], cex = 0.2, pch = 16, col = "#0000F068") | |
points(data_alba[valIDX, ], cex = 0.2, pch = 16, col = "#FF30305F") | |
}) | |
# Thresholded spatial projection | |
## p10 is very conservative | |
alba_map_thr <- alba_map | |
alba_map_thr[alba_map_thr <= p10] <- NA | |
alba_map_thr[alba_map_thr > p90] <- p90 | |
values(alba_map_thr) <- rescale(values(alba_map_thr), from = c(p10, p90), to = c(0, 1)) | |
plot(alba_map_thr, col = hcl.colors(100, "Green-Yellow", rev = TRUE), | |
legend = TRUE, main = "Quercus alba [rescaled]", | |
sub = "Calibration points in blue; validation points in red", | |
zlim = c(0, 1), | |
addfun = function() { | |
lines(ne_coastline(110, "sp")) | |
points(data_alba[-valIDX, ], cex = 0.2, pch = 16, col = "#0000F068") | |
points(data_alba[valIDX, ], cex = 0.2, pch = 16, col = "#FF30305F") | |
}) | |
# Thresholded spatial projection | |
## use minimum training presence | |
alba_map_mtp <- alba_map | |
mtp <- min(extract(alba_map, data_alba[-valIDX, ]), na.rm = TRUE) | |
maxtp <- max(values(alba_map_mtp), na.rm = TRUE) | |
alba_map_mtp[alba_map_mtp <= mtp] <- NA | |
alba_map_mtp <- raster::stretch(alba_map_mtp, minv = 0, maxv = 1, smin = mtp, smax = maxtp) | |
plot(alba_map_mtp, col = hcl.colors(100, "Green-Yellow", rev = TRUE), | |
legend = TRUE, main = "Quercus alba [rescaled mtp]", | |
sub = "Calibration points in blue; validation points in red", | |
zlim = c(0, 1), | |
addfun = function() { | |
lines(ne_coastline(110, "sp")) | |
points(data_alba[-valIDX, ], cex = 0.2, pch = 16, col = "#0000F068") | |
points(data_alba[valIDX, ], cex = 0.2, pch = 16, col = "#FF30305F") | |
}) | |
# Thresholded spatial projection | |
## use 9th percentile of all values > 0 as upper bound. 0 as lower. | |
alba_map_q95 <- alba_map | |
q95 <- quantile(alba_map_q95[alba_map_q95 > 0], 0.95, na.rm = TRUE) | |
alba_map_q95 <- raster::stretch(alba_map_q95, minv = 0, maxv = 1, smin = 0, smax = q95) | |
plot(alba_map_q95, col = hcl.colors(100, "Green-Yellow", rev = TRUE), | |
legend = TRUE, main = "Quercus alba [rescaled q95]", | |
sub = "Calibration points in blue; validation points in red", | |
zlim = c(0, 1), | |
addfun = function() { | |
lines(ne_coastline(110, "sp")) | |
points(data_alba[-valIDX, ], cex = 0.2, pch = 16, col = "#0000F068") | |
points(data_alba[valIDX, ], cex = 0.2, pch = 16, col = "#FF30305F") | |
}) | |
{par(mfrow = c(2,2)) | |
hist(extract(alba_map_thr, data_alba), breaks = 20, xlim = c(0, 1), main = "p10/p90") | |
hist(extract(alba_map_mtp, data_alba), breaks = 20, xlim = c(0, 1), main = "mtp") | |
hist(extract(alba_map_q95, data_alba), breaks = 20, xlim = c(0, 1), main = "q95") | |
par(mfrow = c(1,1))} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment