Last active
May 4, 2021 11:52
-
-
Save mschauer/3ffebdb8581f56341a9d to your computer and use it in GitHub Desktop.
Kalman filter, Rauch-Tung-Striebel smoother and parameter estimation with EM procedure for the state space model (Shumway, Stoffer)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Kalman | |
using Distributions | |
export LinearDS, generate, kalmanfilter, kalmanfilter!, kalmanrts, kalmanrts!, kalmanEM, randmvn | |
""" | |
Sampling singular semidefinite multivariate normal distributions | |
x = randmvn(S) | |
S = cholfact(A, :L, Val{true}) | |
x ~ N(0, A) | |
""" | |
randmvn(S) = ipermute!(S[:L]*permute!(randn(size(S,1)),S[:p]), S[:p]) | |
function randmvn(S::Base.LinAlg.CholeskyPivoted, n) | |
d = size(S,1) | |
Z = zeros(d, n) | |
for i in 1:n | |
Z[:,i] = randmvn(S) | |
end | |
Z | |
end | |
""" | |
Linear dynamical model with linear observation scheme | |
""" | |
type LinearDS | |
# Initial x1 ~ N(x0, P0) | |
x0 # d | |
P0 # dxd | |
# Evolution x -> Phi x + b + w, w ~ N(0, Q) | |
Phi # dxd | |
b # d | |
Q # dxd | |
# Observational model y = H*x + v, v ~ N(0, R) | |
H # d2xd | |
R # d2xd2 | |
end | |
""" | |
Generate observations | |
Y, X = generate(n, m, M::LinearDS) | |
n -- number of observations per simulation | |
m -- number of independent simulations | |
M -- linear dynamic system and observation model | |
Y -- observations (d2xnxm array) | |
X -- simulated processes (dxnxm array) | |
""" | |
function generate(n, m, M::LinearDS) | |
H, Phi, b = M.H, M.Phi, M.b | |
d2, d = size(M.H) | |
X = zeros(d, n, m) | |
Y = zeros(d2, n, m) | |
Nv = MvNormal(zeros(d2), M.R) | |
S = cholfact(M.Q, :L, Val{true}) # allow singular Q | |
Sx0 = cholfact(M.P0, :L, Val{true}) # | |
#Nw = MvNormal(zeros(d), M.Q) | |
#Nx0 = MvNormal(M.x0, M.P0) | |
for j in 1:m | |
x = M.x0 + randmvn(Sx0) | |
V = rand(Nv,n) | |
W = randmvn(S,n) | |
X[:, 1, j] = Phi*x + b + W[:, 1] | |
Y[:, 1, j] = H*x + V[:, 1] | |
for i in 2:n | |
x[:] = Phi*x + b + W[:, i] | |
X[:, i, j] = x | |
Y[:, i, j] = H*x + V[:, i] | |
end | |
end | |
return Y,X | |
end | |
# single Kalman filter step | |
function kalman_kernel(x, y, P, H, Phi, b, Q, R) | |
# prediction | |
x = Phi*x + b | |
Ppred = Phi*P*Phi' + Q | |
#correction | |
yres = y - H*x # innovation residual | |
S = H*Ppred*H' + R # innovation covariance | |
S = (S+S')/2 # symmetrize | |
K = Ppred*H'/S # Kalman gain | |
x = x + K*yres | |
P = (I - K*H)*Ppred # Ppred - Ppred*H' * inv(H*Ppred*H' + R) * H* Ppred | |
Sl = chol(S, :L) | |
ll = -((norm(Sl\yres))^2 + 2sum(log(diag(Sl))))/2 # constant messes up convergence test | |
#ll = -((norm(Sl\yres))^2 + 2sum(log(diag(Sl))) + length(y)*log(2pi))/2 | |
x, P, Ppred, ll, K | |
end | |
""" | |
Kalman filter | |
ll, xxf = kalmanfilter(yy::Matrix, M::LinearDS) | |
yy -- d2xn array | |
M -- linear model | |
ll -- marginal likelihood | |
xxf -- filtered process | |
""" | |
function kalmanfilter(yy::Matrix, M::LinearDS) | |
d = size(M.Phi,1) | |
n = size(yy,2) | |
kalmanfilter!(yy, zeros(d,n), zeros(d, d, n), zeros(d, d, n), M) | |
end | |
function kalmanfilter!(yy, xxf, PP, PPpred, M::LinearDS) | |
H, Phi, b, Q, R = M.H, M.Phi, M.b, M.Q, M.R | |
d2, d = size(M.H) | |
assert(ndims(yy) == 2) | |
n = size(yy,2) | |
xf = M.x0 | |
P = M.P0 | |
ll = 0.0 | |
for i in 1:n | |
xf, P, Ppred, l, _ = kalman_kernel(xf, yy[:, i], P, H, Phi, b, Q, R) | |
xxf[:, i], PP[:, :, i], PPpred[:, :, i] = xf, P, Ppred | |
ll += l | |
end | |
xxf, PP, PPpred, ll | |
end | |
function smoother_kernel(xs, Ps, xf, Pf, Ppred, Phi, b) | |
# xs -- previous | |
# Ps -- previous | |
# xf -- xxf[:, i] | |
# Pf -- PPf[:, :, i] | |
# Ppred -- PPpred[:, :, i+1] | |
J = Pf*Phi'/Ppred | |
xs = xf + J*(xs - (Phi*xf + b)) | |
Ps = Pf + J*(Ps - Ppred)*J' | |
xs, Ps, J | |
end | |
""" | |
Rauch-Tung-Striebel smoother | |
xxs, PP, PPpred, ll = kalmanrts(yy, xxs, M::LinearDS) | |
yy -- d2xn array or | |
xxs -- empty dxn array or view | |
M -- linear dynamic system and observation model | |
xxs -- smoothed process | |
PP -- smoother variance | |
PPpred -- smoother prediction | |
ll -- filter likelihood | |
""" | |
function kalmanrts(yy::Matrix, M::LinearDS) | |
d = size(M.Phi,1) | |
n = size(yy,2) | |
kalmanrts!(yy, zeros(d,n), zeros(d, d, n), zeros(d, d, n), M) | |
end | |
function kalmanrts!(yy, xx, PP, PPpred, M::LinearDS) # using Rauch-Tung-Striebel | |
# forward pass | |
# in place! xx = xxf = xxs | |
# PP = PPf = PPs | |
xx, PP, PPpred, ll = kalmanfilter!(yy, xx, PP, PPpred, M::LinearDS) | |
# backwards pass | |
H, Phi, b = M.H, M.Phi, M.b | |
d2, d = size(M.H) | |
assert(ndims(yy) == 2) | |
n = size(yy,2) | |
#start with xf, P from forward pass | |
xs = xx[:, n] | |
Ps = PP[:, :, n] | |
for i in n-1:-1:1 | |
xs, Ps, J = smoother_kernel(xs, Ps, xx[:, i], PP[:, :, i], PPpred[:, :, i+1], Phi, b) | |
xx[:, i] = xs | |
PP[:, :, i] = Ps | |
end | |
xx, PP, PPpred, ll | |
end | |
""" | |
Stack Kalman filter | |
ll, X = kalmanfilter{T}(Y::Array{T,3}, M::LinearDS) | |
Y -- array of m independent processes (dxnxm array) | |
X -- filtered processes (dxnxm array) | |
ll -- product marginal log likelihood | |
""" | |
function kalmanfilter{T}(Y::Array{T,3}, M::LinearDS) | |
d2, n, m= size(Y) | |
d = length(M.x0) | |
X = zeros(d,n,m) | |
PP = zeros(d, d, n) | |
PPpred = zeros(d, d, n) | |
ll = 0. | |
for j in 1:m | |
ll += kalmanfilter!(sub(Y, :, :, j), sub(X, :, :, j), PP, PPpred, M)[4] | |
end | |
ll, X | |
end | |
""" | |
Stack Rauch-Tung-Striebel smoother | |
X = kalmanrts{T}(Y::Array{T,3}, M::LinearDS) | |
Y -- m independent processes (d2xnxm array) | |
X -- smoothed processes (dxnxm array) | |
""" | |
function kalmanrts{T}(Y::Array{T,3}, M::LinearDS) | |
d2, n, m= size(Y) | |
d = length(M.x0) | |
X = zeros(d,n,m) | |
PP = zeros(d, d, n) | |
PPpred = zeros(d, d, n) | |
ll = 0. | |
for j in 1:m | |
ll += kalmanrts!(sub(Y, :, :, j), sub(X, :, :, j), PP, PPpred, M)[4] | |
end | |
ll, X | |
end | |
""" | |
EM procedure for the state space model (Shumway, Stoffer) | |
xxhat, Mhat = kalmanEM(yy, M1::LinearDS, maxit = 500, tol = 0.001) | |
yy -- data | |
M1 -- initial model | |
maxit -- maximum number of iterations | |
tol -- rel. tolerance for test for convergence of marginal likelihood | |
xxhat, Mhat -- smoothed process with estimated model | |
""" | |
function kalmanEM(yy, M::LinearDS, maxit = 500, tol = 0.001) | |
H, Phi, b, Q, R = M.H, M.Phi, M.b, M.Q, M.R | |
d2, d = size(M.H) | |
assert(ndims(yy) == 2) | |
assert(norm(b) == 0) | |
n = size(yy,2) | |
x0 = M.x0 | |
P0 = M.P0 | |
xxs = xxf = zeros(d, n) # #x(f/s)[k] predited and corrected (and smoothed) | |
PPf = zeros(d, d, n) #P[k,k] | |
PPpred = zeros(d, d, n) #P[k, k-1] | |
PPs = zeros(d, d, n) #P[k,k] | |
PPcs = zeros(d, d, n) #P[k-1,k] | |
K = zeros(d, d) | |
cvg = 1.+tol | |
nll = NaN | |
for k in 1:maxit | |
xf = x0 | |
Pf = P0 | |
# E step, forward pass | |
nllold = nll | |
nll = 0. | |
for i in 1:n | |
xf, Pf, Ppred, l, K = kalman_kernel(xf, yy[:, i], Pf, H, Phi, b, Q, R) | |
xxf[:, i], PPf[:, :, i], PPpred[:, :, i] = xf, Pf, Ppred | |
nll -= l | |
end | |
if k > 1 | |
cvg = (nllold-nll)/abs(nllold) | |
cvg < 0 && warn("Likelihood decreasing") | |
if abs(cvg) < tol | |
# println([nll, cvg]) | |
break | |
end | |
end | |
# E step, backwards pass including PPcs | |
#start with xf, Pf from forward pass | |
xs = xf | |
Ps = Pf | |
PPcs[:, :, n] = (I - K*H)*Phi*PPf[:, :, n-1] | |
J = PPf[:, :, n]*Phi'/(Phi*Ps*Phi' + Q) | |
PPs[:, :, n] = Ps | |
for i in n-1:-1:1 | |
Jprev = J | |
xs, Ps, J = smoother_kernel(xs, Ps, xxf[:, i], PPf[:, :, i], PPpred[:, :, i+1], Phi, b) | |
xxs[:, i], PPs[:, :, i] = xs, Ps | |
j = i + 2 #i = j - 2 as J = J(i) | |
if 2 < j <= n | |
Jjm2 = J | |
Jjm1 = Jprev | |
PPcs[:, :, j-1] = PPf[:, :, j-1]*Jjm2' + Jjm1*(PPcs[:, :, j] - Phi*PPf[:, :, j-1])*Jjm2' | |
#Pcs[,,j-1]=Pf[,,j-1]%*%t(J[,,j-2])+ J[,,j-1]%*%(Pcs[,,j]-Phi%*%Pf[,,j-1])%*%t(J[,,j-2])} | |
end | |
end | |
x0, P0, J0 = smoother_kernel(xs, Ps, x0, P0, PPpred[:, :, 1], Phi, b) | |
PPcs[:, :, 1] = PPf[:, :, 1]*J0' + J*(PPcs[:, :, 2] - Phi*PPf[:, :, 1])*J0' | |
# M step | |
A11 = xxs[:, 1]*xxs[:, 1]' + PPs[:, :, 1] | |
A10 = xxs[:, 1]*x0' + PPcs[:, :, 1] | |
A00 = x0*x0' + P0 | |
u = yy[:,1]-H*xxs[:,1] | |
R = u*u' + H*PPs[:, :, 1]*H' | |
for i in 1:n-1 | |
A11 = A11 + xxs[:, i+1]*xxs[:, i+1]' + PPs[:, :, i+1] | |
A10 = A10 + xxs[:, i+1]*xxs[:, i]' + PPcs[:, :, i+1] | |
A00 = A00 + xxs[:, i]*xxs[:, i]' + PPs[:, :, i] | |
u = yy[:,i+1]-H*xxs[:,i+1] | |
R = R + u * u' + H*PPs[:, :, i+1]*H' | |
end | |
Phi = A10/A00 | |
Q = (A11 - Phi*A10')/n | |
Q = (Q+Q')/2 | |
R = R/n | |
# println("PARAM $k: nll", [nll, cvg], "\n x0 $x0\n P0 $P0\n R $R\n Phi $Phi\n Q $Q") | |
end | |
xxs, LinearDS(x0, P0, Phi, b, Q, H, R) | |
end | |
end #module | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment