Created
April 3, 2016 07:58
-
-
Save barusan/6d68b0abce47e1b519ab232725d90718 to your computer and use it in GitHub Desktop.
experiment of unbiased variance over normally-distributed data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/Rscript | |
# run on Mac OS X, homebrew R | |
library(ggplot2) | |
# sample sizes | |
sizes <- c(2, 5, 10, 20, 50, 100, 200, 500, 1000) | |
# return (x - 1) / x | |
ratio <- function(x) { (x - 1) / x } | |
# sample variance | |
variance <- function(x) { var(x) * ratio(length(x)) } | |
# data generation: dataSize samples x N trials | |
N <- 1000 | |
genData <- function(dataSize) { | |
lapply(1:N, function(x) { rnorm(dataSize, mean=0.0, sd=1.0) }) | |
} | |
# trial & evaluation | |
data <- lapply(sizes, genData) | |
sampleVars <- lapply(data, function(x) { sapply(x, variance) }) | |
unbiasVars <- lapply(data, function(x) { sapply(x, var) }) | |
sampleVarsMeans <- sapply(sampleVars, mean) | |
unbiasVarsMeans <- sapply(unbiasVars, mean) | |
# plot | |
quartz() | |
plot(sizes, sampleVarsMeans, log="x", ylim=c(0, 1.5), | |
col="blue", type="p", pch=15, | |
xlab="sample size", | |
ylab=sprintf("average of estimated variance over %d trials", N)) | |
points(sizes, unbiasVarsMeans, | |
col="red", type="p", pch=16) | |
abline(1, 0, col="green4") # ground truth | |
lines(1:2000, sapply(1:2000, ratio), col="yellow4", lty=2) # (N-1)/N | |
legend("topright", | |
c("sample variance", "unbiased variance", | |
"ground truth", "(N - 1) / N"), | |
col=c("blue","red","green4","yellow4"), | |
lty=c(NA,NA,1,2), lwd=c(NA,NA,1,1), pch=c(15,16,NA,NA)) | |
locator() # do not close the window automatically | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment