-
-
Save myrddian/b2d364a0930af11b683a7bf8193700d9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#returns a randomly generated dirichlet | |
random_dirichlet <- function(n_var, alpha, beta) { | |
return_val <- 0 | |
for(i in 1:n_var) { | |
return_val[i] <- rgamma(1,shape=alpha,scale = beta) | |
} | |
#Normalise and round to a nice values | |
return_val <- round(return_val/sum(return_val) * 100,1)/100 | |
return(return_val) | |
} | |
entropy_x_y_d <- function(dataset, alpha) | |
{ | |
#Get sample sizes #Add dirichlet | |
for(i in 1:length(dataset$AUSTRALIA)) | |
{ | |
#Generate a dirichlet, 5 variables, alpha set to the input parameted and beta 1 | |
dirichlet <- random_dirichlet(5,alpha,1) | |
dataset$AUSTRALIA[i] <- dataset$AUSTRALIA[i] + round(dirichlet[1]* alpha,0) | |
dataset$CHINA[i] <- dataset$CHINA[i] + round(dirichlet[2]* alpha,0) | |
dataset$ENGLAND[i] <- dataset$ENGLAND[i] + round(dirichlet[3]* alpha,0) | |
dataset$INDIA[i] <- dataset$INDIA[i] + round(dirichlet[4]* alpha,0) | |
dataset$OTHERS[i] <- dataset$OTHERS[i] + round(dirichlet[5]* alpha,0) | |
} | |
dataset$lga_size <- dataset$AUSTRALIA + dataset$CHINA + dataset$ENGLAND + dataset$INDIA + dataset$OTHERS | |
total_size <- sum(dataset$lga_size) | |
#Calculate expected value | |
dataset$lga_e <- ( | |
(-1 * ( (dataset$AUSTRALIA / dataset$lga_size ) * log2(dataset$AUSTRALIA / dataset$lga_size) )) + | |
(-1 * ( (dataset$ENGLAND / dataset$lga_size ) * log2(dataset$ENGLAND / dataset$lga_size) ) ) + | |
(-1 * ( (dataset$CHINA / dataset$lga_size ) * log2(dataset$CHINA / dataset$lga_size) ) ) + | |
(-1 * ( (dataset$INDIA / dataset$lga_size ) * log2(dataset$INDIA / dataset$lga_size) ) ) + | |
(-1 * ( (dataset$OTHERS / dataset$lga_size ) * log2(dataset$OTHERS / dataset$lga_size) ) ) | |
) | |
#Pre-compute mean weighted average and multiply with the expected Entropy value | |
dataset$lg_mwa <- (dataset$lga_size/total_size ) * dataset$lga_e | |
#Sum the values | |
t_ent <- sum(dataset$lg_mwa) | |
return(t_ent) | |
} | |
#Generate the plot data | |
for(i in 1:100) | |
{ | |
#Add 5 to increase the starting bias as anythign <5 will cause it to generate 0 | |
#Values | |
plot_points[i] <- entropy_x_y_d(data, i + 5) | |
} | |
#Plot the data | |
plot(seq(1,100), plot_points, ylab="Entropy (in bits)", xlab="Alpha size") | |
#Initial starting condition 0-set array and the test beta | |
ex_b = c(0.43, -1.7, 2.8) | |
initial_theta <- rep(0,ncol(XTest)) | |
#Z Function its the function that adds all the inputs and returns | |
#A value to the tanh activator | |
q2.util.z <- function(beta, i_num, x_row) { | |
zv <- beta[1] + x_row[2]*beta[2] + x_row[3]*beta[3] | |
return(zv) | |
} | |
#Various utility functions that cover steps of the | |
#the log likelihood | |
q2.util.p <- function(i_z) return(exp(i_z) + exp(-1*i_z)) #Log of the thanh activator function | |
q2.util.l <- function(p_z) return(-1 *log(p_z)) #Returns the -log of the value | |
q2.util.yb <- function(y,p_z) return(y*p_z) #multiplies Y by p_z | |
q2.util.norm <- function(v,n) return(v/n) #divides all the values by n | |
#This function is the loglikelihood function as provided | |
#The algorithm is basic, it adds up all the | |
#values according to the function supplied | |
q2.t_log <- function(init_b=ex_b,X=XTest,Y=YTest) | |
{ | |
n <- length(Y) | |
sum_t <-0 | |
for(i in 1:n) | |
{ | |
x_r <- X[i,] | |
zv <- q2.util.z(init_b,i,x_r) | |
pv <- q2.util.p(zv) | |
ll <- (q2.util.l(pv) + q2.util.yb(Y[i],zv)) | |
sum_t <- sum_t + ll | |
} | |
sum_t <- (sum_t ) * -1 | |
return(sum_t) | |
} | |
#This is the gradient decent function | |
#It is based on the derivative of the provided log likelihood function | |
#the form yi*xi - bi*tanh(z) | |
#where z = b0x0 + b1x1 + b2x2 | |
#While a specifc special derived b0 function was calculated | |
#I found that the function worked with the general function | |
q2.t_gradient <- function(init_b=ex_b,X=XTest,Y=YTest) | |
{ | |
n <- length(Y) | |
alpha <- 0.00526 #Value experimentally determined for stability | |
gradient <- init_b #copy the array | |
for(i in 1:n) | |
{ | |
x_r <- X[i,] | |
zv <- q2.util.z(gradient,i,x_r) | |
#Work out the gradient using the formula | |
b0 <- ((Y[i]*x_r[1]) - (gradient[1]*tanh(zv))) | |
b1 <- ((Y[i]*x_r[2]) - (gradient[2]*tanh(zv))) | |
b2 <- ((Y[i]*x_r[3]) - (gradient[3]*tanh(zv))) | |
#Once the gradients are calculated put them back into the array | |
#following the formula x = x - alpha*gradient(x) | |
gradient[1] = gradient[1] - (alpha * b0 ) | |
gradient[2] = gradient[2] - (alpha * b1 ) | |
gradient[3] = gradient[3] - (alpha * b2 ) | |
} | |
return(gradient) | |
} | |
q2.var.normal <- 0 | |
q2.var.normal.sd <- 10 | |
#Ok the Guasssian theory for P(BETA|X) = (P(X|BETA)P(BETA))/P(X) | |
#The log taken is: log P(BETA) + log P(X|BETA) −log P(X) | |
#Normally P(X) is ignored following: https://wiseodd.github.io/techblog/2017/01/01/mle-vs-map/ | |
#Give us log P(BETA) + log P(X|BETA) - log P(X|BETA) is our MLE to begin with! | |
#Where P(BETA) = Normal(BETA) | |
#So We take our MLE and ADD the P(BETA) Component to the solver | |
#This function calculates the log of the normal distribution using the PDF | |
#https://www.ime.unicamp.br/~cnaber/optim%202.pdf | |
#Following the document the beta prior is calculated as log of the pnorm | |
#Not the LogNorm distribution, if follows (1/sd*2)*b^2 | |
#Gives us the log of the norm in the P(BETA) Component | |
q2.util.p_beta <- function(beta) { | |
lp1 <- beta[1]^2 | |
lp2 <- beta[2]^2 | |
lp3 <- beta[3]^2 | |
lp <- (lp1 + lp2 + lp3)*(1/(q2.var.normal.sd*2)) #Finally compute the log prior | |
return(lp) #the betas n-curve prior | |
} | |
q2.t_blog <- function(init_b=ex_b,X=XTest,Y=YTest) | |
{ | |
n <- length(Y) | |
sum_t <-0 | |
lp <- q2.util.p_beta(init_b) | |
for(i in 1:n) | |
{ | |
x_r <- X[i,] | |
zv <- q2.util.z(init_b,i,x_r) | |
pv <- q2.util.p(zv) | |
ll <- (q2.util.l(pv) + q2.util.yb(Y[i],zv)) | |
sum_t <- sum_t + ll | |
} | |
sum_t <- (sum_t * -1) + lp | |
return(sum_t) | |
} | |
print("Sanity check the likelihood function") | |
q2.t_log() | |
q2.t_blog() | |
print("Sanity Check the gradient - see if its close to the given beta when it returns") | |
q2.t_gradient() | |
print("Sanity Check the gradient - see if its behaves with a 0 set") | |
q2.t_gradient(init_b=initial_theta) | |
print("Optim - no Gradient used - No MAP") | |
optim(par=initial_theta,X=X,Y=Y,fn=q2.t_log)$par | |
print("BFGS - Gradient used - No MAP") | |
optim(par=initial_theta,X=X,Y=Y,fn=q2.t_log,gr=q2.t_gradient, method="BFGS")$par | |
print("Optim - no Gradient used - using MAP") | |
optim(par=initial_theta,X=X,Y=Y,fn=q2.t_blog)$par | |
print("BFGS - Gradient used - using MAP") | |
optim(par=initial_theta,X=X,Y=Y,fn=q2.t_blog,gr=q2.t_gradient, method="BFGS")$par |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment