Skip to content

Instantly share code, notes, and snippets.

@astrofrog
Created May 11, 2020 16:41
Show Gist options
  • Save astrofrog/edafe936b54ffec9917e5573737f09a3 to your computer and use it in GitHub Desktop.
Save astrofrog/edafe936b54ffec9917e5573737f09a3 to your computer and use it in GitHub Desktop.
def sample_dask_array_chunks(array, n_chunks):
"""
Return an 1-d array which contains the data values from n_chunks randomly
sampled from the chunks in the array (without replacement)
"""
# Find the indices of the chunks to extract
indices = [np.random.randint(dimsize, size=n_chunks) for dimsize in array.numblocks]
# Determine the boundaries of chunks along each dimension
chunk_indices = [np.hstack([0, np.cumsum([size for size in sizes])]) for sizes in array.chunks]
data = []
for ichunk in range(n_chunks):
slices = tuple(slice(chunk_indices[idim][indices[idim][ichunk]],
chunk_indices[idim][indices[idim][ichunk] + 1])
for idim in range(array.ndim))
data.append(array[slices].compute().ravel())
return np.hstack(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment