Skip to content

Instantly share code, notes, and snippets.

@pwolfram
Forked from nkeim/fast_hist.py
Last active August 29, 2015 14:24
Show Gist options
  • Save pwolfram/87ce2fa5186b5df40a4b to your computer and use it in GitHub Desktop.
Save pwolfram/87ce2fa5186b5df40a4b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import numpy as np
def fast_hist(data, bin_edges):
"""Fast 1-dimensional histogram. Comparable to numpy.histogram(), but careless.
'bin_edges' should encompass all values in 'data'; the first and last elements
in 'bin_edges' are ignored, and are effectively (-infinity, infinity).
Returns the histogram array only.
"""
# Yes, I've tested this against histogram().
return np.bincount(np.digitize(data, bin_edges[1:-1]), minlength=len(bin_edges) - 1)
def fast_hist_2d(data, bin_edges):
"""Fast 2-dimensional histogram. Comparable to numpy.histogramdd(), but careless.
'data' is an Nx2 array. 'bin_edges' is used for both dimensions and should
encompass all values in 'data'; the first and last elements in 'bin_edges'
are ignored, and are effectively (-infinity, infinity).
Returns the histogram array only.
"""
# Yes, I've tested this against histogramdd().
xassign = np.digitize(data[:,0], bin_edges[1:-1])
yassign = np.digitize(data[:,1], bin_edges[1:-1])
nbins = len(bin_edges) - 1
flatcount = np.bincount(xassign + yassign * nbins, minlength=nbins*nbins)
return flatcount.reshape((nbins, nbins)).T
def fast_hist_weighted(data, weights, bin_edges):
"""Fast 1-dimensional histogram. Comparable to numpy.histogram(), but careless.
'bin_edges' should encompass all values in 'data'; the first and last elements
in 'bin_edges' are ignored, and are effectively (-infinity, infinity).
Returns the histogram array only with weighting. Non-weighted version at https://gist.github.com/nkeim/4455635
"""
return np.bincount(np.digitize(data, bin_edges[1:-1]), minlength=len(bin_edges) - 1, weights=weights)
def test_hist():
print 'Note: bins must contain the whole range of the data for fast and regular histograms to be equivalent!'
a = np.random.randn(1e7)
b = np.random.randn(1e7)
lvec = np.array([-1000, 0,1,4,16, 1000])
pdf,_ = np.histogram(a,bins=lvec,weights=b,density=False)
pdfnew = fast_hist_weighted(a,b,lvec)
print 'Error between weighted fast histograms:'
print np.max(np.abs(pdf-pdfnew))
print np.mean(np.sqrt((pdf-pdfnew)**2.0))
pdf,_ = np.histogram(a,bins=lvec,density=False)
pdfnew = fast_hist(a,lvec)
print 'Error between fast histograms:'
print np.max(np.abs(pdf-pdfnew))
print np.mean(np.sqrt((pdf-pdfnew)**2.0))
if __name__ == "__main__":
test_hist()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment