Created
June 25, 2016 23:40
-
-
Save klainfo/baabe7664a392a057e9c8fb5ce4bd64a to your computer and use it in GitHub Desktop.
An Example Code for the Out-of-sample Bootstrap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(DefectData) | |
library(pROC) | |
Data <- loadData("eclipse-2.0") | |
data <- Data$data | |
dep <- Data$dep | |
indep <- Data$indep | |
# Normality Correction | |
transformLog <- function(y){ y <- log1p(y)} | |
data[,indep] <- data.frame(apply(data[,indep], 2, transformLog)) | |
data[,dep] <- as.factor(ifelse(data[,dep] == "TRUE","T","F")) | |
performance <- NULL | |
for(i in seq(1,100)){ | |
# Generate a bootstrap sample with replacement | |
indices <- sample(nrow(data),replace=TRUE) | |
# Generate training dataset using a bootstrap sample | |
training <- data[indices,] | |
# Generate testing dataset (i.e., instances that | |
# are not included in the bootstrap sample) | |
testing <- data[-unique(indices),] | |
# Generate model formula | |
f <- as.formula(paste0(dep, " ~ ", paste0(indep,collapse = "+"))) | |
# Fit a prediction model using a logistic regression model | |
m <- glm(f, data=training, family="binomial") | |
# Extract probabilities using the testing dataset | |
prob <- predict(m, testing, type="response") | |
# Compute AUC performance | |
performance <- c(performance, auc(testing[,dep],prob)) | |
} | |
# Report the average AUC performance | |
mean(performance) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment