klainfo · June 25, 2016 23:40
diff --git a/out-of-sample-bootstrap.R b/out-of-sample-bootstrap.R
 library(DefectData)
 library(pROC)
 Data <- loadData("eclipse-2.0")
 data <- Data$data
 dep <- Data$dep
 indep <- Data$indep

 # Normality Correction
 transformLog  <- function(y){ y <- log1p(y)}
 data[,indep] <- data.frame(apply(data[,indep], 2, transformLog))
 data[,dep] <- as.factor(ifelse(data[,dep] == "TRUE","T","F"))

 performance <- NULL
 for(i in seq(1,100)){
    # Generate a bootstrap sample with replacement
    indices <- sample(nrow(data),replace=TRUE)
    
    # Generate training dataset using a bootstrap sample
    training <- data[indices,]
    
    # Generate testing dataset (i.e., instances that 
    # are not included in the bootstrap sample)
    testing <- data[-unique(indices),]
    
    # Generate model formula
    f <- as.formula(paste0(dep, " ~ ", paste0(indep,collapse = "+")))
    
    # Fit a prediction model using a logistic regression model
    m <- glm(f, data=training, family="binomial")
    
    # Extract probabilities using the testing dataset
    prob <- predict(m, testing, type="response")
    
    # Compute AUC performance
    performance <- c(performance, auc(testing[,dep],prob))
 }

 # Report the average AUC performance
 mean(performance)
	library(DefectData)
	library(pROC)
	Data <- loadData("eclipse-2.0")
	data <- Data$data
	dep <- Data$dep
	indep <- Data$indep

	# Normality Correction
	transformLog <- function(y){ y <- log1p(y)}
	data[,indep] <- data.frame(apply(data[,indep], 2, transformLog))
	data[,dep] <- as.factor(ifelse(data[,dep] == "TRUE","T","F"))

	performance <- NULL
	for(i in seq(1,100)){
	# Generate a bootstrap sample with replacement
	indices <- sample(nrow(data),replace=TRUE)

	# Generate training dataset using a bootstrap sample
	training <- data[indices,]

	# Generate testing dataset (i.e., instances that
	# are not included in the bootstrap sample)
	testing <- data[-unique(indices),]

	# Generate model formula
	f <- as.formula(paste0(dep, " ~ ", paste0(indep,collapse = "+")))

	# Fit a prediction model using a logistic regression model
	m <- glm(f, data=training, family="binomial")

	# Extract probabilities using the testing dataset
	prob <- predict(m, testing, type="response")

	# Compute AUC performance
	performance <- c(performance, auc(testing[,dep],prob))
	}

	# Report the average AUC performance
	mean(performance)