Skip to content

Instantly share code, notes, and snippets.

@s3i7h
Created January 8, 2022 16:37
Show Gist options
  • Save s3i7h/f62e960403d4cc4ebb1226f711574f60 to your computer and use it in GitHub Desktop.
Save s3i7h/f62e960403d4cc4ebb1226f711574f60 to your computer and use it in GitHub Desktop.
A sample implementation of the Reservoir Sampling algorithm (might be wrong)
from typing import Iterable, TypeVar, List
from collections import deque
from itertools import islice
import math
import random
T = TypeVar("T")
def consume(it: Iterable, k: int = None):
if k is None:
deque(it, maxlen=0)
else:
next(islice(it, k, k), None)
def reservoir_sample(population: Iterable[T], k) -> List[T]:
pop_iter = iter(population)
reservoir = []
for _, item in zip(range(k), pop_iter):
reservoir.append(item)
w = 1
exhast_marker = object()
while True:
w *= math.exp(math.log(random.random())/k)
skip = math.floor(
math.log(random.random())/
math.log(1-w)
)
consume(pop_iter, skip)
item = next(pop_iter, exhast_marker)
if item is exhast_marker:
break
reservoir[random.randrange(k)] = item
return reservoir
if __name__ == "__main__":
print(reservoir_sample(range(6), 3))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment