Last active
April 20, 2016 10:14
-
-
Save barusan/4b799594a510fbf9550b052421d524b7 to your computer and use it in GitHub Desktop.
experiment of unbiased variance over normally-distributed data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
#-*- coding: utf-8 -*- | |
import math | |
import numpy as np | |
import matplotlib.pyplot as plt | |
MEAN = 0.0 | |
VARIANCE = 1.0 | |
NTRIAL = 1000 | |
Ns = [2, 5, 10, 20, 50, 100, 200, 500, 1000] | |
sample_vars = [] | |
unbiased_vars = [] | |
for N in Ns: | |
svars = [] | |
uvars = [] | |
for trial in range(NTRIAL): | |
data = np.random.normal(MEAN, math.sqrt(VARIANCE), N) | |
svar = np.var(data, ddof=0) # sample variance | |
uvar = np.var(data, ddof=1) # unbiased variance | |
svars.append(svar) | |
uvars.append(uvar) | |
sample_vars.append(np.mean(svars)) | |
unbiased_vars.append(np.mean(uvars)) | |
plt.plot([0, 2000], [1.0, 1.0], "g-", label="ground truth") | |
X = np.arange(1, 2000, 1) | |
plt.plot(X, map(lambda x: float(x-1)/float(x), X), "y--", label="$(N-1)/N$") | |
plt.plot(Ns, sample_vars, "bs", ms=7, label="sample variance") | |
plt.plot(Ns, unbiased_vars, "ro", ms=7, label="unbiased variance") | |
plt.xscale("log") | |
plt.xlim(xmax=2000) | |
plt.ylim(ymax=1.5) | |
plt.xlabel("sample size") | |
plt.ylabel("average of estimated variance over %d trials" % NTRIAL) | |
plt.legend() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment