Skip to content

Instantly share code, notes, and snippets.

@bthirion
Forked from jakevdp/README.rst
Created December 30, 2011 11:10
Show Gist options
  • Save bthirion/1539342 to your computer and use it in GitHub Desktop.
Save bthirion/1539342 to your computer and use it in GitHub Desktop.
GMM BIC/AIC test

This includes a test of the new GMM routines in https://github.com/bthirion/scikit-learn/tree/gmm-fixes

By changing the line

GMM = mixture.GMM

at the top of the file, we can plot the BIC and AIC for each variant of GMM. Standard GMM works beautifully: it settles in on 3 components, which are a good description of the data. DPGMM and VBGMM produce some unexpected results.

import pylab as pl
import numpy as np
from sklearn import mixture
GMM = mixture.GMM
#GMM = mixture.DPGMM
#GMM = mixture.VBGMM
# make some data
Npts = 100
np.random.seed(0)
centers = np.array([[0.25, 0.75],
[0.75, 0.75],
[0.75, 0.25]])
covars = 0.1
X = np.random.normal(centers, covars, size=(Npts, 3, 2)) * 10
X = X.reshape(-1, 2)
colors = (np.ones((Npts,1)) * np.arange(3)).reshape(-1)
pl.figure()
pl.scatter(X[:, 0], X[:, 1], c=colors, s=16, lw=0)
pl.title('input data')
n_components = np.arange(1, 16)
BIC = np.zeros(n_components.shape)
AIC = np.zeros(n_components.shape)
for i, n in enumerate(n_components):
clf = GMM(n_components=n,
covariance_type='diag')
clf.fit(X)
if isinstance(GMM, mixture.VBGMM) or isinstance(GMM, mixture.DPGMM):
BIC[i] = - 2 * clf.eval(X)[0].sum()
else:
BIC[i] = clf.bic(X)
pl.figure()
pl.plot(n_components, AIC, label='AIC')
pl.plot(n_components, BIC, label='BIC')
pl.legend(loc=0)
pl.xlabel('n_components')
pl.ylabel('AIC / BIC')
i_n = np.argmin(BIC)
clf = GMM(n_components[i_n])
clf.fit(X)
label = clf.predict(X)
pl.figure()
pl.scatter(X[:, 0], X[:, 1], c=label, s=16, lw=0)
pl.title('classification at min(BIC)')
pl.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment