Last active
May 19, 2021 08:40
-
-
Save BlasBenito/768a45951a3d0c5ba355f52be94b05de to your computer and use it in GitHub Desktop.
Use of spatialRF::auto_cor() and spatialRF::auto_vif() to reduce multicollinearity in a set of predictors while taking into account a preference order defined by the user..
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#installing and loading package | |
remotes::install_github( | |
repo = "blasbenito/spatialRF", | |
ref = "main", | |
force = TRUE, | |
quiet = TRUE | |
) | |
library(spatialRF) | |
library(magrittr) | |
#loading other required pa | |
data(plant_richness_df) | |
#data frame with predictors | |
predictors.df <- plant_richness_df[, 5:21] | |
#1. manual preference order | |
#-------------------------------------- | |
#preference order | |
preference.order <- c( | |
"climate_bio1_average_X_bias_area_km2", | |
"climate_aridity_index_average", | |
"climate_hypervolume", | |
"climate_bio1_average", | |
"climate_bio15_minimum", | |
"bias_area_km2" | |
) | |
#reducing multicollinearity | |
#via Pearson correlation with auto_cor() | |
#and variance inflation factor via auto_vif() | |
variable.selection <- spatialRF::auto_cor( | |
x = predictors.df, | |
cor.threshold = 0.5, | |
preference.order = preference.order | |
) %>% | |
spatialRF::auto_vif( | |
vif.threshold = 2.5, | |
preference.order = preference.order | |
) | |
#selected variables | |
variable.selection$selected.variables | |
#applying auto_vif() before auto_cor() | |
#does not change the result | |
variable.selection <- spatialRF::auto_vif( | |
x = predictors.df, | |
vif.threshold = 2.5, | |
preference.order = preference.order | |
) %>% | |
spatialRF::auto_cor( | |
cor.threshold = 0.5, | |
preference.order = preference.order | |
) | |
#selected variables | |
variable.selection$selected.variables | |
#2. automatic preference order | |
#------------------------------------ | |
#fitting a model to extract importance scores | |
m <- spatialRF::rf( | |
data = plant_richness_df, | |
dependent.variable.name = "richness_species_vascular", | |
predictor.variable.names = colnames(predictors.df), | |
verbose = FALSE | |
) | |
#plotting importance scores | |
spatialRF::plot_importance(m) | |
#getting preference order in order of variable importance | |
preference.order <- m$importance$per.variable$variable | |
#reducing multicollinearity | |
variable.selection <- spatialRF::auto_cor( | |
x = predictors.df, | |
cor.threshold = 0.5, | |
preference.order = preference.order | |
) %>% | |
spatialRF::auto_vif( | |
vif.threshold = 2.5, | |
preference.order = preference.order | |
) | |
#selected variables | |
variable.selection$selected.variables |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment