Skip to content

Instantly share code, notes, and snippets.

@timflutre
Last active May 8, 2016 12:51
Show Gist options
  • Save timflutre/56e36d4769491c282793 to your computer and use it in GitHub Desktop.
Save timflutre/56e36d4769491c282793 to your computer and use it in GitHub Desktop.
learn how to use various R packages to perform Bayesian inference via sampling
## Goal of this document: learn how to use various R packages to perform
## Bayesian inference via sampling
## Author: Timothée Flutre (INRA)
## I. Model
## sub-section: details about Ga and InvGa distributions
## II. Simulations
## III. Inference via sampling
## III.1 Fit with OpenBUGS (many explanations)
## sub-section: detailed diagnostics with "coda" and "mcmcse"
## III.2 Fit with JAGS
## III.3 Fit with Stan
## III.4 Fit with MCMCglmm
rm(list=ls())
library(parallel)
library(R2OpenBUGS)
library(rjags)
library(MCMCglmm)
## library(mcmc)
library(rstan)
library(coda)
library(mcmcse)
## ============================================================================
## I. Model
## Normal with unknown mean and variance
## Ref: DeGroot (1970, p169), Hoff (2009, p73)
## http://www.cs.ubc.ca/~murphyk/Papers/bayesGauss.pdf
## likelihood: for all n in {1,...,N}, y_n | mu, tau ~ N(mu, 1/tau)
## parameters: mean mu and precision tau
## priors: tau ~ G(a_0, b_0) and mu | tau ~ N(m_0, 1/(tau t_0))
## hyper-parameters: shape a_0, rate b_0, mean m_0, precision t_0
## posteriors: tau | y ~ G(a_N, b_N) and mu | y, tau ~ N(m_N, t_N)
## Note: Gamma prior on tau not recommended anymore (Gelman et al, 2006)
## ----------------------------------------------------------------------------
## Remarks about the Gamma and Inverse-Gamma distributions
## On Wikipedia: https://en.wikipedia.org/wiki/Gamma_distribution
## X ~ Ga(k, theta) where k is "shape" and theta is "scale"
## E[X] = k theta and V[X] = k theta^2
## X ~ Ga(alpha, beta) where alpha is "shape" and beta is "rate" (rate=1/scale)
## E[X] = alpha / beta and V[X] = alpha / beta^2
## X ~ Ga(shape, scale) <=> 1/X ~ InvGa(shape, 1/scale)
## In R: http://stat.ethz.ch/R-manual/R-patched/library/stats/html/GammaDist.html
## X ~ Ga(a, s) where a is "shape" and "s" is "scale", but also option "rate"
## In OpenBugs: http://www.openbugs.net/Manuals/ModelSpecification.html
## X ~ Ga(r, mu) where r is "shape" and mu is "rate"
## if X ~ Ga(shape=0.001, rate=0.001) => E[X]=1 and V[X]=1000 (uninformative)
dgamma(x=1, shape=10, scale=0.1) # 1.2511
dgamma(x=1, shape=10, rate=1/0.1) # 1.2511
pdfGa <- function(x, shape, scale=NULL, rate=NULL){
stopifnot(xor(is.null(scale), is.null(rate)))
if(is.null(rate)){
(1 / (gamma(shape) * scale^shape)) * x^(shape - 1) * exp(- x / scale)
} else
rate^shape / gamma(shape) * x^(shape - 1) * exp(- rate * x)
}
pdfGa(x=1, shape=10, scale=0.1) # 1.2511
pdfGa(x=1, shape=10, rate=1/0.1) # 1.2511
pdfInvGa <- function(x, shape, scale){
scale^shape / gamma(shape) * x^(- shape - 1) * exp(- scale / x)
}
pdfInvGa(x=1, shape=10, scale=1/0.1) # 1.2511
library(MCMCpack)
dinvgamma(x=1, shape=10, scale=1/0.1) # 1.2511
sampleInvGa <- function(n, shape, scale){
1 / rgamma(n=n, shape=shape, scale=1/scale)
}
set.seed(1)
x1 <- sampleInvGa(n=1000, shape=2.1, scale=1.1)
head(x1)
summary(x1)
set.seed(1)
x2 <- rinvgamma(n=1000, shape=2.1, scale=1.1)
head(x2)
summary(x2)
## ============================================================================
## II. Simulations
set.seed(7263)
truth <- c()
## prior of tau
a.0 <- 3
truth <- append(x=truth,
values=setNames(object=a.0, nm="a.0"))
b.0 <- 2
truth <- append(x=truth,
values=setNames(object=b.0, nm="b.0"))
truth <- append(x=truth,
values=setNames(object=a.0 / b.0, nm="prior.mean.tau"))
truth <- append(x=truth,
values=setNames(object=a.0 / b.0^2, nm="prior.var.tau"))
plot(x=seq(0,11,0.1),
y=dgamma(x=seq(0,11,0.1), shape=a.0, rate=b.0),
main=paste0("Gamma(shape=", a.0, ", rate=", b.0, ")"),
xlab="x", ylab="pdf", type="l")
(tau <- rgamma(n=1, shape=a.0, rate=b.0))
truth <- append(x=truth,
values=setNames(object=tau, nm="tau"))
truth <- append(x=truth,
values=setNames(object=1 / tau, nm="sigma2"))
truth <- append(x=truth,
values=setNames(object=sqrt(1 / tau), nm="sigma"))
## prior of mu
m.0 <- 73
truth <- append(x=truth,
values=setNames(object=m.0, nm="prior.mean.mu"))
t.0 <- 10^(-1)
truth <- append(x=truth,
values=setNames(object=1 / (tau * t.0), nm="prior.var.mu"))
plot(x=seq(60,86,0.1),
y=dnorm(x=seq(60,86,0.1), mean=m.0, sd=sqrt(1/(tau*t.0))),
main=paste0("Normal(mean=", m.0, ", sd=",
format(sqrt(1/(tau*t.0)), digits=3), ")"),
xlab="x", ylab="pdf", type="l")
(mu <- rnorm(n=1, mean=m.0, sd=sqrt(1/(tau * t.0))))
truth <- append(x=truth,
values=setNames(object=mu, nm="mu"))
## data
N <- 5*10^1
y <- rnorm(n=N, mean=mu, sd=sqrt(1/tau))
summary(y)
hist(y, breaks="FD", freq=FALSE, col="grey", border="white",
main="Simulated data",
ylim=c(0, dnorm(x=mu, mean=mu, sd=sqrt(1/tau))))
abline(v=mu, col="red")
curve(dnorm(x=x, mean=mu, sd=sqrt(1/tau)), add=TRUE, col="red")
## maximum likelihood estimates
truth <- append(x=truth,
values=setNames(object=mean(y), nm="mle.mu"))
truth <- append(x=truth,
values=setNames(object=1 / var(y), nm="mle.tau"))
truth <- append(x=truth,
values=setNames(object=var(y), nm="mle.sigma2"))
truth <- append(x=truth,
values=setNames(object=sd(y), nm="mle.sigma"))
## posterior
a.N <- a.0 + N / 2
truth <- append(x=truth,
values=setNames(object=a.N, nm="a.N"))
b.N <- b.0 + (1/2) * sum((y - mean(y))^2) +
(t.0 * N * (mean(y) - m.0)^2) / (2 * (t.0 + N))
truth <- append(x=truth,
values=setNames(object=b.N, nm="b.N"))
truth <- append(x=truth,
values=setNames(object=a.N / b.N, nm="post.mean.tau"))
truth <- append(x=truth,
values=setNames(object=a.N / b.N^2, nm="post.var.tau"))
truth <- append(x=truth,
values=setNames(object=b.N / (a.N - 1), nm="post.mean.sigma2"))
truth <- append(x=truth,
values=setNames(object=b.N^2 / ((a.N - 1)^2 * (a.N - 2)),
nm="post.var.sigma2"))
m.N <- (t.0 * m.0 + N * mean(y)) / (t.0 + N)
truth <- append(x=truth,
values=setNames(object=m.N, nm="post.mean.mu"))
t.N <- (t.0 + N)
truth <- append(x=truth,
values=setNames(object=1 / (t.N * a.N / b.N),
nm="post.var.mu"))
## graphical overview of the analytical inference
plot(x=0, y=0, type="n", xlim=c(65,82), ylim=c(0,2),
xlab="x", ylab="density",
main="Bayesian inference of a Normal with unknown mean and variance")
curve(dnorm(x=x, mean=truth["prior.mean.mu"], sd=sqrt(truth["prior.var.mu"])),
add=TRUE, col="green", lwd=2)
curve(dnorm(x=x, mean=mu, sd=sqrt(1/tau)), add=TRUE, col="black", lwd=2)
curve(dnorm(x=x, mean=truth["post.mean.mu"], sd=sqrt(truth["post.var.mu"])),
add=TRUE, col="red", lwd=2)
legend(x="topright", legend=c("prior(mu|tau)", "likelihood(y;mu,tau)",
"posterior(mu|y,tau)"),
col=c("green","black","red"), lwd=2, bty="n")
## ============================================================================
## III. Inference via sampling
nb.chains <- 4
nb.iters <- 13000
nb.cores <- 1 # need to check how to set seeds in parallel before setting it > 1
##' MCMC diagnostics
##'
##' Plot the trace, autocorrelation, running average and density side by side for a single chain.
##' TODO: show dark background for burn-in in traceplot
##' @param res.mcmc object of class "mcmc" (not "mcmc.list"!)
##' @param param.name parameter name in the columns of res.mcmc
##' @param subplots vector indicating which sub-plot(s) to make (1 for trace, 2 for autocorrelation, 3 for running quantiles, 4 for density)
##' @param pe point estimate (optional, but required if "hi" is given)
##' @param hi half-interval (optional)
##' @return nothing
##' @author Timothee Flutre
plot.mcmc.chain <- function(res.mcmc, param.name, subplots=1:4,
pe=NULL, hi=NULL){
library(coda)
stopifnot(is.mcmc(res.mcmc),
is.character(param.name),
param.name %in% colnames(res.mcmc),
is.vector(subplots), is.numeric(subplots))
if(! is.null(hi))
stopifnot(! is.null(pe))
changed.par <- FALSE
if(1 %in% subplots & 2 %in% subplots & 3 %in% subplots & 4 %in% subplots &
all(par("mfrow") == c(1, 1))){
par(mfrow=c(2,2))
changed.par <- TRUE
}
if(1 %in% subplots){
## ts.plot(res.mcmc[, param.name],
coda::traceplot(res.mcmc[, param.name],
## xlab="Iterations",
ylab="Trace",
main=paste0(param.name))
if(! is.null(pe)){
abline(h=pe, col="red")
if(! is.null(hi)){
abline(h=pe + 2 * hi, col="red", lty=2)
abline(h=pe - 2 * hi, col="red", lty=2)
}
}
}
if(2 %in% subplots){
## acf(res.mcmc[, param.name],
autocorr.plot(res.mcmc[, param.name],
main=paste0(param.name),
## xlab="Lag",
## ylab="Autocorrelation",
auto.layout=FALSE,
ask=FALSE)
}
if(3 %in% subplots){
## ts.plot(cumsum(res.mcmc[, param.name]) / seq(along=res.mcmc[, param.name]),
cumuplot(res.mcmc[, param.name],
probs=c(0.025, 0.5, 0.975),
main=paste0(param.name),
## xlab="Iterations",
ylab="Running quantiles",
auto.layout=FALSE,
ask=FALSE)
if(! is.null(pe)){
abline(h=pe, col="red")
if(! is.null(hi)){
abline(h=pe + 2 * hi, col="red", lty=2)
abline(h=pe - 2 * hi, col="red", lty=2)
}
}
}
if(4 %in% subplots){
## plot(density(res.mcmc[, param.name]), type="l",
densplot(res.mcmc[, param.name],
ylab="Density",
main=paste0(param.name))
if(! is.null(pe)){
abline(v=pe, col="red")
if(! is.null(hi)){
abline(v=pe + 2 * hi, col="red", lty=2)
abline(v=pe - 2 * hi, col="red", lty=2)
}
}
}
if(changed.par)
par(mfrow=c(1,1))
}
## ----------------------------------------------------------------------------
## III.1 Fit with OpenBUGS
## priors: mu ~ N(0, 10^8) and sigma^2 ~ IG(0.001,0.001)
model.file <- "model_bugs.txt"
model <- function(){
for(i in 1:N){
y[i] ~ dnorm(mu, tau)
}
mu ~ dnorm(0, 1.0E-8)
tau ~ dgamma(0.001, 0.001)
sigma2 <- 1 / tau
}
write.model(model=model, con=model.file)
inits <- function(nb.chains=1){
return(lapply(1:nb.chains, function(c){list(mu=0, tau=1)}))
}
(st.r2ob <- system.time(
fit.r2ob <- bugs(data=list(N=N, y=y),
inits=inits(nb.chains),
parameters.to.save=c("mu", "sigma2"),
n.iter=nb.iters,
model.file=model.file,
n.chains=nb.chains,
n.burnin=0,
n.thin=1,
DIC=TRUE,
codaPkg=FALSE, # use "as.mcmc.list"
saveExec=TRUE,
restart=FALSE, # used when resampling
working.directory=getwd())
))
fit.r2ob
## convert output into a format ready for the "coda" package
res.r2ob <- as.mcmc.list(fit.r2ob)
## now, we have to perform diagnostics (see sub-section below)
## ...
## after diagnostics, sometimes, we may want to run more iterations:
fit.r2ob <- bugs(data=list(N=N, y=y),
inits=inits(nb.chains),
parameters.to.save=c("mu", "sigma2"),
n.iter=nb.iters,
model.file=model.file,
n.chains=nb.chains,
n.burnin=0, # don't change so that prev iterations are kept
n.thin=1,
DIC=TRUE,
codaPkg=FALSE,
saveExec=TRUE,
restart=TRUE, # this changed compare to the first call
working.directory=getwd())
fit.r2ob # notice iterations from both first and second runs
## go back to the diagnostics, check if more iterations are needed
## if not, remove burn-in and thin if necessary
## and report posterior means and variances
## as well as their Monte Carlo standard errors
## -------------------------------------
## Diagnostics and MCSEs
res <- res.r2ob # so that code of this sub-section works whatever the sampler
## plot diagnostics for a single chain
chain.idx <- 1
par(mfcol=c(4,2))
plot.mcmc.chain(res[[chain.idx]], "mu",
pe=truth["post.mean.mu"], hi=sqrt(truth["post.var.mu"]))
plot.mcmc.chain(res[[chain.idx]], "sigma2",
pe=truth["post.mean.sigma2"])
par(mfrow=c(1,1))
## conclusion: convergence seem to have been reached; no lag
## plot diagnostics for all chains but only for "mu"
par(mfcol=c(4,nb.chains))
for(chain.idx in 1:nb.chains)
plot.mcmc.chain(res[[chain.idx]], "mu",
pe=truth["post.mean.mu"], hi=sqrt(truth["post.var.mu"]))
par(mfrow=c(1,1))
## same conclusion per chain; same results across chains
## plot diagnostics for all chains but only for "sigma2"
par(mfcol=c(4,nb.chains))
for(chain.idx in 1:nb.chains)
plot.mcmc.chain(res[[chain.idx]], "sigma2",
pe=truth["post.mean.sigma2"])#, hi=sqrt(post.var.sigma2))
par(mfrow=c(1,1))
## idem; note the skewness of the posterior of sigma2
## plot trace for all chains and all parameters
par(mfrow=c(nb.chains,2))
for(chain.idx in 1:nb.chains){
plot.mcmc.chain(res[[chain.idx]], "mu", subplots=1,
pe=truth["post.mean.mu"], hi=sqrt(truth["post.var.mu"]))
plot.mcmc.chain(res[[chain.idx]], "sigma2", subplots=1,
pe=truth["post.mean.sigma2"])#, hi=sqrt(post.var.sigma2))
}
## Gelman-Rubin R-hat
gelman.plot(res)
## non-graphical diagnostics
lapply(res, autocorr.diag)
t(sapply(res, effectiveSize))
(gel <- gelman.diag(res))
geweke.diag(res)
heidel.diag(res)
raftery.diag(res)
## at this step, it may be necessary to resample
## if yes, go back to the section of each sampler to see how it is done
## if this is still not enough, the model may have to be re-parametrized
## or even modified substantially, correlated variables may have to be updated
## together ("block"), auxiliary variables may have to be added ("slice"),
## try sophisticated schemes (e.g. parallel tempering) or even whole other
## methods (HMC, particle MCMC)...
## depending on the diagnostics above, apply burn-in and thinning
## but they are not always needed (Geyer, Handbook of MCMC, 2011, ch1)
burnin <- 1000
thin <- 1
res <- window(x=res, start=1 + burnin, end=nb.iters, thin=thin)
## check again with all chains pooled
autocorr.diag(res)
effectiveSize(res)
## summarize the inference
summary(res)
## mu: post.mean=73.705 ts.SE=0.0009981
## sigma2: post.mean=2.417 ts.SE=0.0023422
plot(res)
## estimate mean and Monte Carlo standard errors (MCSE) for "mu" and "sigma2"
(post.means <- mcse.mat(x=do.call(rbind, res)[,c("mu","sigma2")],
method="bm", g=NULL))
## TODO: for MCSEs, check if it's right to pool draws from all chains
## see Flegal & Jones, Handbook of MCMC, 2011, ch7
## but should we permute draws, as in "rstan"?
## TODO: estimate variance and MCSE for "mu" and "sigma2"
## calculate effective sample size with "mcmcse"
ess(do.call(rbind, res)[,c("mu","sigma2")])
multiESS(do.call(rbind, res)[,c("mu","sigma2")])
par(mfrow=c(1,2))
estvssamp(do.call(rbind, res)[,"mu"])
estvssamp(do.call(rbind, res)[,"sigma2"])
## ----------------------------------------------------------------------------
## III.2 Fit with JAGS
## priors: mu ~ N(0, 10^8) and sigma^2 ~ IG(0.001,0.001)
model.file <- "model_jags.txt"
model <- function(){
for(i in 1:N){
y[i] ~ dnorm(mu, tau)
}
mu ~ dnorm(0, 1.0E-8)
tau ~ dgamma(0.001, 0.001)
sigma2 <- 1 / tau
}
write.model(model=model, con=model.file) # useful fct from R2OpenBUGS
inits <- function(nb.chains=1){
return(lapply(1:nb.chains, function(c){list(mu=0, tau=1)}))
}
jags <- jags.model(file=model.file,
data=list(N=N, y=y),
inits=inits(nb.chains),
n.chains=nb.chains,
n.adapt=0,
quiet=TRUE)
(st.rjags <- system.time(
res.rjags <- coda.samples(model=jags, variable.names=c("mu", "sigma2"),
n.iter=nb.iters, thin=1)
))
## play with graphical diagnostics as for R2OpenBUGS
## ...
## re-sample if necessary
res.rjags <- coda.samples(model=jags, variable.names=c("mu", "sigma2"),
n.iter=nb.iters, thin=1)
## ----------------------------------------------------------------------------
## III.3 Fit with Stan
## priors: mu ~ C(0, 5) and sigma ~ |C(0, 5)|
model <- "data {
int<lower=0> N;
vector[N] y;
}
parameters {
real mu;
real<lower=0> sigma;
}
transformed parameters {
real<lower=0> sigma2;
sigma2 <- sigma^2;
}
model {
sigma ~ cauchy(0, 5); // implicit half-Cauchy
mu ~ cauchy(0, 5);
y ~ normal(mu, sigma);
}"
model.file <- "model.stan"
write(x=model, file=model.file)
rt <- stanc(file=model.file, model_name="Simple Normal")
sm <- stan_model(stanc_ret=rt, verbose=FALSE)
save(sm, file="rstan_sm.RData")
load("rstan_sm.RData")
warmup <- 100
st.rstan <- system.time(
fit.rstan <- sampling(object=sm,
data=list(N=N, y=y),
iter=nb.iters + warmup,
warmup=warmup,
thin=1,
chains=nb.chains)
)
traceplot(object=fit.rstan, ask=FALSE)
traceplot(object=fit.rstan, ask=FALSE, inc_warmup=FALSE)
traceplot(object=fit.rstan, pars="mu", ask=FALSE)
print(fit.rstan)
res.rstan <- as.mcmc.list(
lapply(1:dim(fit.rstan)[2],
function(chain.idx){
as.mcmc(as.array(fit.rstan)[,chain.idx,])
}))
## play with graphical diagnostics as for R2OpenBUGS
## ...
burnin <- 2000
thin <- 1
res.rstan <- window(x=res.rstan, start=1 + burnin, end=nb.iters-warmup,
thin=thin)
## ...
## TODO: show how to re-sample, starting at the current position of each chain
## ----------------------------------------------------------------------------
## III.4 Fit with MCMCglmm
## priors: mu ~ N(0, 10^8) and sigma^2 ~ IG(0.001,0.001)
## TODO: set seeds for parallel chains?
(st.mcmcglmm <- system.time(
fit.mcmcglmm <-
mclapply(1:nb.chains, function(c){
MCMCglmm(y ~ 1, data=data.frame(y=y),
prior=list(B=list(mu=0, V=10^8),
R=list(V=1, nu=0.002)),
nitt=nb.iters, thin=1, burnin=0,
verbose=FALSE)
}, mc.cores=nb.cores)
))
res.mcmcglmm <- as.mcmc.list(
lapply(1:nb.chains, function(c){
mcmc(data=cbind(fit.mcmcglmm[[c]]$Sol,
fit.mcmcglmm[[c]]$VCV))
}))
## play with graphical diagnostics as for R2OpenBUGS
## ...
## TODO: show how to re-sample, starting at the current position of each chain
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment