Skip to content

Instantly share code, notes, and snippets.

@myrddian
Last active July 29, 2019 04:11
Show Gist options
  • Save myrddian/b9f40be0fbcf5c8ff99528fffb80c782 to your computer and use it in GitHub Desktop.
Save myrddian/b9f40be0fbcf5c8ff99528fffb80c782 to your computer and use it in GitHub Desktop.
library(mvtnorm)
library(ggplot2)
library(MCMCpack)
expected_task <- list(mean=3,sd=2)
hyper_parameters <- list(alpha=1, beta=1)
data_samples = rnorm(100,6,3)
random_samples <- rnorm(100,20,5)
find_likelyhood <- function(target_model, target) {
return(dnorm(target,target_model$mean, target_model$sd))
}
parameter_log_likelyhood <- function(data_samples, prior_model, target_model) {
likely_hood = 0
log_prior = log((dnorm(target_model$mean, prior_model$mean, prior_model$sd)))
for(index in 1:length(data_samples)) {
sample <- find_likelyhood(target_model, data_samples[index])
likely_hood = log(sample) + likely_hood
}
return (likely_hood + log_prior)
}
new_sd_based_on_mean <- function(data_samples, target_model, hyper_parameter) {
mean_sum = 0
for(index in 1:length(data_samples)) {
mean_sum = mean_sum + (data_samples[index] - target_model$mean)
}
mean_sum = mean_sum /2
alpha = hyper_parameter$alpha + (length(data_samples)/2)
beta = hyper_parameter$beta + mean_sum
if(beta < 1) {
target_model$sd = 1
target_model$mean = target_model$mean + target_model$sd
return(target_model)
}
target_model$sd = (rinvgamma(1, alpha, beta))
target_model$mean = target_model$mean + target_model$sd
return(target_model)
}
find_map <- function(data_samples, prior, hyper_parameters, iter=100, max_jump = 1) {
ret_val <- prior
best_log = -999999
for(iteration in 1:iter) {
permute <- ret_val
#Change mean
if(sample(0:1,1) == 0) {
permute$mean = permute$mean - runif(1,0,max_jump)
} else {
permute$mean = permute$mean + runif(1,0,max_jump)
}
#Change sd based on new mean
permute = new_sd_based_on_mean(data_samples, permute, hyper_parameters)
if(permute$sd > 0 && permute$mean >= 1) {
current_log = parameter_log_likelyhood(data_samples, prior, permute)
if(is.nan(as.numeric(current_log)) == FALSE) {
if(current_log > best_log) {
best_log = current_log
ret_val = permute
}
}
}
}
return(ret_val)
}
optimise_map <- function(data_set, starting_prior , hyper_parameters,jump_factor=5, iter=100) {
ret_val <- starting_prior
best_log = -999999
for(iteration in 1:iter) {
test_model <- find_map(data_set, ret_val, hyper_parameters, 100, jump_factor)
score = parameter_log_likelyhood(data_set, ret_val, test_model)
if(score > best_log) {
ret_val <- test_model
}
}
return(ret_val)
}
find_model_map <- function(data_set, prior) {
model_1_data <- subset(data_set, y==1, select=c(x1,x2))
model_2_data <- subset(data_set, y==-1, select=c(x1,x2))
model_1_data.x1.data <- model_1_data$x1
model_1_data.x2.data <- model_1_data$x2
model_2_data.x1.data <- model_2_data$x1
model_2_data.x2.data <- model_2_data$x2
model_1_data.x1.map <- optimise_map(model_1_data.x1.data,prior,hyper_parameters)
model_1_data.x2.map <- optimise_map(model_1_data.x2.data,prior,hyper_parameters)
model_2_data.x1.map <- optimise_map(model_2_data.x1.data,prior,hyper_parameters)
model_2_data.x2.map <- optimise_map(model_2_data.x2.data,prior,hyper_parameters)
ret_val <- list(m1.x1.map=model_1_data.x1.map, m1.x2.map=model_1_data.x2.map ,
m2.x1.map=model_2_data.x1.map, m2.x2.map=model_2_data.x2.map )
return(ret_val)
}
cov_matrix_optimiser <- function(model_data, x1_map, x2_map, iter=1000) {
mu0 = runif(1,-5,5)
b_mu = mu0
cov_matrix_m1 <- matrix(c(x1_map$sd, mu0, mu0, x2_map$sd), nrow=2)
mean_vector_m1 <-c(x1_map$mean, x2_map$mean)
best_log = -999999
best_matrix <- c()
for(iteration in 1:iter) {
#find the loglikelyhood for this dataset
current_log = 0
for(index in 1:nrow(model_data)){
point_vector = c(model_data$x1[index],model_data$x2[index])
current_log = current_log + log(dmvnorm(point_vector,mean_vector_m1,cov_matrix_m1))
}
if(current_log > best_log) {
best_log = current_log
best_matrix = cov_matrix_m1
b_mu = mu0
}
#Mutate MC
mu0 = b_mu + runif(1,-10,10)
cov_matrix_m1 <- matrix(c(x1_map$sd, mu0, mu0, x2_map$sd), nrow=2)
}
#print("Best Cov Matrix: ")
#print(best_matrix)
return(best_matrix)
}
train_bayes <- function(data_set) {
model_1_data <- subset(data_set, y==1, select=c(x1,x2))
model_2_data <- subset(data_set, y==-1, select=c(x1,x2))
model <- find_model_map(data_set, expected_task)
mean_vector_m1 <-c(model$m1.x1.map$mean, model$m1.x2.map$mean)
mean_vector_m2 <-c(model$m2.x1.map$mean, model$m2.x2.map$mean)
cov_m1 <- cov_matrix_optimiser(model_1_data, model$m1.x1.map, model$m1.x2.map)
cov_m2 <- cov_matrix_optimiser(model_2_data, model$m2.x1.map, model$m2.x2.map)
# calculate predictions:
predict <- function(T) {
return (ifelse(
dmvnorm(x=T, mean=mean_vector_m1, sigma=cov_m1) > dmvnorm(x=T, mean=mean_vector_m2, sigma=cov_m2),
1,
-1))
}
return (list("predict"=predict))
}
classification_error_func <- function(T_hat, T) {
return (sum(T_hat!=T)/nrow(T_hat) * 100)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment