Last active
August 23, 2020 17:20
-
-
Save MichaelSnowden/8c7b0ba0c4963fec6911062a61e5a311 to your computer and use it in GitHub Desktop.
Reservoir Sampling in Elm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module NonEmpty exposing (..) | |
type alias NonEmpty a = | |
{ head : a, tail : List a } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module ReservoirSampling exposing (sampleList, sampleStream) | |
import NonEmpty exposing (NonEmpty) | |
import Random | |
sampleStream : Int -> a -> a -> Random.Generator a | |
sampleStream numSeen old new = | |
let | |
gotRandomInt k = | |
let | |
replace = | |
k == 0 | |
chosen = | |
if replace then | |
new | |
else | |
old | |
in | |
chosen | |
in | |
Random.map gotRandomInt (Random.int 0 (numSeen + 1)) | |
sampleListAfter : Int -> NonEmpty a -> Random.Generator a | |
sampleListAfter numSeen { head, tail } = | |
case tail of | |
[] -> | |
Random.constant head | |
nextHead :: nextTail -> | |
let | |
gotSample sample = | |
sampleListAfter (numSeen + 1) { head = sample, tail = nextTail } | |
in | |
Random.andThen gotSample (sampleStream numSeen head nextHead) | |
sampleList : NonEmpty a -> Random.Generator a | |
sampleList = | |
sampleListAfter 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module ReservoirSamplingTest exposing (..) | |
import Dict exposing (Dict) | |
import Expect | |
import NonEmpty exposing (NonEmpty) | |
import Random | |
import ReservoirSampling | |
import Test exposing (Test) | |
exampleTest : Test | |
exampleTest = | |
let | |
input = | |
{ head = 1, tail = [ 2, 3 ] } | |
( actual1, _ ) = | |
Random.step (ReservoirSampling.sampleList input) (Random.initialSeed 0) | |
( actual2, _ ) = | |
Random.step (ReservoirSampling.sampleList input) (Random.initialSeed 3) | |
( actual3, _ ) = | |
Random.step (ReservoirSampling.sampleList input) (Random.initialSeed 1) | |
in | |
Test.describe "exampleTest" | |
[ Test.test "sample1" <| always <| Expect.equal actual1 1 | |
, Test.test "sample2" <| always <| Expect.equal actual2 2 | |
, Test.test "sample3" <| always <| Expect.equal actual3 3 | |
] | |
generateSample : Int -> Random.Generator Int | |
generateSample size = | |
let | |
head = | |
0 | |
tail = | |
List.range 1 (size - 1) | |
list = | |
{ head = head, tail = tail } | |
in | |
ReservoirSampling.sampleList list | |
generateSampleCounts : Int -> Int -> Dict Int Int -> Random.Generator (Dict Int Int) | |
generateSampleCounts numSamples numBuckets counts = | |
if numSamples == 0 then | |
Random.constant counts | |
else | |
let | |
gotFirstSample sample = | |
let | |
updateCount maybeCount = | |
case maybeCount of | |
Just count -> | |
Just <| count + 1 | |
Nothing -> | |
Just 1 | |
newCounts = | |
Dict.update sample updateCount counts | |
in | |
generateSampleCounts (numSamples - 1) numBuckets newCounts | |
in | |
generateSample numBuckets |> Random.andThen gotFirstSample | |
-- Generate a bunch of lists and verify that the sampler samples uniformly from them | |
randomTest : Test | |
randomTest = | |
let | |
numSamples = | |
10000 | |
numBuckets = | |
10 | |
( counts, _ ) = | |
Random.step (generateSampleCounts numSamples numBuckets Dict.empty) (Random.initialSeed 42) | |
uniformDistribution = | |
1.0 / numBuckets | |
actualDistribution count = | |
toFloat count / numSamples | |
toTest : ( Int, Int ) -> Test | |
toTest ( bucket, count ) = | |
Test.test ("bucket " ++ String.fromInt bucket ++ " should have proportion ~ 1 / numBuckets") <| | |
\_ -> | |
Expect.within (Expect.Relative 0.1) uniformDistribution (actualDistribution count) | |
tests : List Test | |
tests = | |
Dict.toList counts |> List.map toTest | |
in | |
Test.describe "randomTest" | |
[ Test.test ("should have " ++ String.fromInt numBuckets ++ " tests") <| \_ -> Expect.equal numBuckets (List.length tests) | |
, Test.describe "should be close to uniform" tests | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment