Last active
November 15, 2017 03:17
-
-
Save cynthia/4fc0fcf8bb97aee4487d05704a5ee4b4 to your computer and use it in GitHub Desktop.
A experimental Mahalanobis whitening (ZCA) implementation using covariance approximation with FastMCD. I don't know if this is mathematically correct or whether if this works or not. Use with caution.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import numpy | |
from sklearn.covariance import MinCovDet | |
class FastMCDZCA(): | |
""" | |
An implementation of Mahalanobis whitening (ZCA) using FastMCD. This | |
implementation has a couple questionable pseudoscience bits that I haven't | |
fully proved why it works - but anyhow. | |
https://en.wikipedia.org/wiki/Whitening_transformation | |
References | |
---------- | |
.. [Rouseeuw1984] `P. J. Rousseeuw. Least median of squares regression. | |
J. Am Stat Ass, 79:871, 1984.` | |
.. [Rousseeuw] `A Fast Algorithm for the Minimum Covariance Determinant | |
Estimator, 1999, American Statistical Association and the American | |
Society for Quality, TECHNOMETRICS` | |
.. [ButlerDavies] `R. W. Butler, P. L. Davies and M. Jhun, | |
Asymptotics For The Minimum Covariance Determinant Estimator, | |
The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400` | |
""" | |
def __init__(self, store_precision=True, assume_centered=False, support_fraction=None, random_state=None): | |
self.mcd_ = MinCovDet(store_precision=store_precision, assume_centered=assume_centered, | |
support_fraction=support_fraction, random_state=random_state) | |
self.U_ = None | |
self.S_ = None | |
self.V_ = None | |
self.epsilon = numpy.finfo(numpy.float).eps | |
self.zca_ = None | |
def correct_covariance(self, data): | |
return self.mcd_(data) | |
def error_norm(self, comp_cov, norm='frobenius', scaling=True, squared=True): | |
return self.mcd_.error_norm(comp_cov, norm=norm, scaling=scaling, squared=squared) | |
def fit(self, X, y=None): | |
ret = self.mcd_.fit(X, y=y) | |
self.U_, self.S_, self.V_ = numpy.linalg.svd(self.mcd_.covariance_) | |
self.zca_ = numpy.dot(self.U_, numpy.dot(numpy.diag(1.0 / numpy.sqrt(self.S_ + self.epsilon)), self.U_.T)) | |
return ret | |
def transform(self, X): | |
if self.U_ is None or self.S_ is None or self.V_ is None or self.zca_ is None: | |
raise RuntimeError("fit() must be called first") | |
return numpy.dot(self.zca_, X) | |
def fit_transform(self, X, y=None): | |
self.fit(X, y=y) | |
return self.transform(X) | |
def get_params(self): | |
raise RuntimeError("Not implemented yet") | |
def get_precision(self): | |
return self.mcd_.get_precision() | |
def mahalanobis(self, observations): | |
return self.mcd_.mahalanobis(observations) | |
def reweight_covariance(self, data): | |
return self.mcd_.reweight_covariance(data) | |
def score(self, X_test, y=None): | |
return self.mcd_.score(X_test, y=y) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment