Last active
March 3, 2023 16:45
-
-
Save HarryMcCarney/5348ff466ff938ff1e04b7288936cf02 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Think Bayes - Allen B. Downey | |
//Chapter 1 - Probability | |
#r "nuget: FSharp.Stats, 0.4.12-preview.1" | |
#r "nuget: fsharp.data" | |
#r "nuget: Plotly.NET" | |
open FSharp.Data | |
open FSharp.Stats | |
open FSharp.Stats.Distributions | |
open Plotly.NET | |
[<Literal>] | |
let file = | |
"https://raw.githubusercontent.com/AllenDowney/ThinkBayes2/master/data/gss_bayes.csv" | |
type Data = CsvProvider<file, HasHeaders=true> | |
let data = Data.Load(file) | |
let bankers = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Indus10 = 6870)) | |
let females = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Sex = 2)) | |
let liberals = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Polviews <= 3)) | |
let democrats = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Partyid <= 1)) | |
let prob s = | |
s |> Seq.averageBy (fun (c, b) -> if b = true then 1. else 0) | |
let conj p1 p2 = | |
p1 |> Seq.zip p2 |> Seq.map (fun (b, d) -> fst b, (snd b) && (snd d)) | |
let cond p given = | |
given | |
|> Seq.zip p | |
|> Seq.filter (fun (p, g) -> snd g) | |
|> Seq.map (fun (p, g) -> p) | |
bankers |> prob | |
females |> prob | |
liberals |> prob | |
democrats |> prob | |
conj democrats bankers |> prob | |
cond females bankers |> prob | |
cond liberals females |> prob | |
conj liberals democrats |> cond females |> prob | |
conj liberals females |> cond bankers |> prob | |
// Excercises | |
//1.1 | |
conj females bankers |> conj liberals |> conj democrats |> prob | |
//1.2 | |
cond liberals democrats |> prob | |
cond democrats liberals |> prob | |
//1.3 | |
let young = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Age < 30)) | |
let old = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Age >= 65)) | |
let conservative = data.Rows |> Seq.map (fun r -> r.Caseid, (r.Polviews >= 5)) | |
young |> prob | |
old |> prob | |
conservative |> prob | |
conj young liberals |> prob | |
cond liberals young |> prob | |
cond old conservative |> prob | |
cond conservative old |> prob | |
cond old conservative |> prob | |
//Chapter 2 - Bayes Theorem | |
type Prior = | |
{ Hypothesis: string | |
Prior: float | |
Likelihood: float } | |
type Posterior = | |
{ Hypothesis: string | |
Prior: float | |
Likelihood: float | |
Posterior: float } | |
let calcPosteriors (priors: Prior list) : Posterior list = | |
let totalProbability = priors |> List.sumBy (fun r -> r.Prior * r.Likelihood) | |
priors | |
|> List.map (fun h -> | |
{ Hypothesis = h.Hypothesis | |
Prior = h.Prior | |
Likelihood = h.Likelihood | |
Posterior = ((h.Prior * h.Likelihood) / totalProbability) }) | |
// Probability of Vanilla | |
let priors = | |
[ { Hypothesis = "Bowl 1" | |
Prior = 0.5 | |
Likelihood = 0.75 } | |
{ Hypothesis = "Bowl 2" | |
Prior = 0.5 | |
Likelihood = 0.50 } ] | |
priors |> calcPosteriors | |
// Dice Problem | |
let diceTable = | |
[ { Hypothesis = "6 Sided" | |
Prior = 0.3333333333 | |
Likelihood = 0.1666666667 } | |
{ Hypothesis = "8 Sided" | |
Prior = 0.3333333333 | |
Likelihood = 0.125 } | |
{ Hypothesis = "12 Sided" | |
Prior = 0.3333333333 | |
Likelihood = 0.08333333333 } ] | |
diceTable |> calcPosteriors | |
// Monty Hall Problem | |
let montyTable = | |
[ // you first choose door one and monty has opened door three to reveal a goat | |
{ Hypothesis = "Door 1" | |
Prior = 0.3333333333 | |
Likelihood = 0.5 // if it were behind door 1 there a 50/50 that month would open door 3 | |
} | |
{ Hypothesis = "Door 2" | |
Prior = 0.3333333333 | |
Likelihood = 1 //if it were behind door 2 monty has to open door 3 a we have chosen door 1 | |
} // likelihood that monthy opens door 3 | |
{ Hypothesis = "Door 3" | |
Prior = 0.3333333333 | |
Likelihood = 0 // cant be here as month has opened door 3 | |
} ] | |
montyTable | |
|> calcPosteriors | |
//Exercises | |
// 2.1 | |
[ { Hypothesis = "! Head" | |
Prior = 0.5 | |
Likelihood = 0.5 } | |
{ Hypothesis = "2 Head" | |
Prior = 0.5 | |
Likelihood = 1. } ] | |
|> calcPosteriors | |
//2.2 | |
[ { Hypothesis = "both girls" | |
Prior = 0.25 | |
Likelihood = 1. } | |
{ Hypothesis = "both boys" | |
Prior = 0.25 | |
Likelihood = 0. } | |
{ Hypothesis = "1st is girl 2nd is boy" | |
Prior = 0.25 | |
Likelihood = 0.5 } | |
{ Hypothesis = "1st is boy 2nd is girl" | |
Prior = 0.25 | |
Likelihood = 0.5 } ] | |
|> calcPosteriors | |
//2.3 | |
let montyDoor2 = | |
[ { Hypothesis = "Door 1" | |
Prior = 0.3333333333 | |
Likelihood = 0.5 } | |
{ Hypothesis = "Door 2" | |
Prior = 0.3333333333 | |
Likelihood = 1 } | |
{ Hypothesis = "Door 3" | |
Prior = 0.3333333333 | |
Likelihood = 0 } ] | |
let montyDoor2 = | |
[ { Hypothesis = "Door 1" | |
Prior = 0.3333333333 | |
Likelihood = 0.5 } | |
{ Hypothesis = "Door 2" | |
Prior = 0.3333333333 | |
Likelihood = 0 } | |
{ Hypothesis = "Door 3" | |
Prior = 0.3333333333 | |
Likelihood = 0.5 } ] | |
montyDoor2 |> calcPosteriors | |
let montyDoor3 = | |
[ { Hypothesis = "Door 1" | |
Prior = 0.3333333333 | |
Likelihood = 0 } | |
{ Hypothesis = "Door 2" | |
Prior = 0.3333333333 | |
Likelihood = 1 } | |
{ Hypothesis = "Door 3" | |
Prior = 0.3333333333 | |
Likelihood = 0 } ] | |
montyDoor3 |> calcPosteriors | |
//2.4 | |
let MnMs = | |
[ { Hypothesis = "Bag 1" | |
Prior = 0.50 | |
Likelihood = 0.2 } | |
{ Hypothesis = "Bag 2" | |
Prior = 0.50 | |
Likelihood = 0.13 } ] | |
|> calcPosteriors | |
//Chapter 3 - Distributions | |
let getProbabilities (probs: 'a array) (map: Map<'a, float>) : float array = probs |> Array.map (fun p -> map[p]) | |
let categories = [ 1; 2; 3; 4; 5; 6 ] |> Set.ofSeq | |
let die = | |
EmpiricalDistribution.createNominal (Template = categories) [ 1; 2; 3; 3; 6; 6 ] | |
die[4] | |
let lettersOld = | |
"mississippi".ToCharArray() | |
|> Array.map string | |
|> Array.toList | |
|> Frequency.createGeneric | |
|> Empirical.ofHistogram | |
lettersOld["i"] | |
let myAlphabetNum = | |
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" |> Set.ofSeq | |
let letters = | |
EmpiricalDistribution.createNominal (Template = myAlphabetNum) "mississippi" | |
getProbabilities [| 1; 4; 6 |] die | |
letters.['i'] | |
letters.['z'] | |
//Cookie problem revisited | |
let priorDist = EmpiricalDistribution.createNominal () [ "Bowl 1"; "Bowl 2" ] | |
let likelihoodVanilla = [ "Bowl 1", 0.75; "Bowl 2", 0.5 ] |> Map.ofSeq | |
let likelihoodChocolate = [ "Bowl 1", 0.25; "Bowl 2", 0.5 ] |> Map.ofSeq | |
let normalise (dist: seq<'a * float>) = | |
let totalProbability = dist |> Seq.sumBy snd | |
dist |> Seq.map (fun (k, v) -> k, v / totalProbability) |> Map.ofSeq | |
let updatePosteriorDist (likelihoods: Map<'a, float>) (priorDist: Map<'a, float>) = | |
priorDist | |
|> Map.toSeq | |
|> Seq.map (fun (k, v) -> | |
match (likelihoods.TryFind k) with | |
| Some l -> (k, v * l) | |
| None -> (k, v)) | |
|> normalise | |
updatePosteriorDist likelihoodVanilla priorDist | |
|> updatePosteriorDist likelihoodVanilla | |
|> updatePosteriorDist likelihoodChocolate | |
//101 Bowls | |
let drawChart priorDist posteriorDist vanillas chocolates = | |
let posteriorAfterOneVanillaLine = | |
Chart.Line((posteriorDist |> Map.toSeq), Name = "Posterior") | |
let prior101DistLine = Chart.Line((priorDist |> Map.toSeq), Name = "Prior") | |
let desc = | |
ChartDescription.create | |
(sprintf "Posterior after %i vanilla cookies and %i chocolate cookies" vanillas chocolates) | |
"" | |
[ posteriorAfterOneVanillaLine; prior101DistLine ] | |
|> Chart.combine | |
|> Chart.withXAxisStyle ("Bowl") | |
|> Chart.withYAxisStyle ("PMF") | |
|> Chart.withDescription (desc) | |
|> Chart.show | |
let prior101Dist = EmpiricalDistribution.createNominal () ([ 0..100 ] |> Seq.ofList) | |
let likelihoodVanilla = | |
[ 0..100 ] |> List.map (fun i -> i, (float i / 100.)) |> Map.ofList | |
let likelihoodChocolate = | |
[ 0..100 ] |> List.map (fun i -> i, 1. - (float i / 100.)) |> Map.ofList | |
let posterior2 = | |
updatePosteriorDist likelihoodVanilla prior101Dist | |
|> updatePosteriorDist likelihoodVanilla | |
drawChart prior101Dist posterior2 2 0 | |
let posterior3 = | |
updatePosteriorDist likelihoodVanilla prior101Dist | |
|> updatePosteriorDist likelihoodVanilla | |
|> updatePosteriorDist likelihoodChocolate | |
drawChart prior101Dist posterior3 2 1 | |
let posterior3 = | |
updatePosteriorDist likelihoodVanilla prior101Dist | |
|> updatePosteriorDist likelihoodVanilla | |
|> updatePosteriorDist likelihoodChocolate | |
|> Empirical.maxLike | |
//The dice problem | |
let priorDiceDist = EmpiricalDistribution.createNominal () ([ 6; 8; 12 ]) | |
let diceLikelihoods1 = [ 6, (1. / 6.); 8, (1. / 8.); 12, (1. / 12.) ] |> Map.ofList | |
let diceLikelihoods7 = [ 6, 0.; 8, (1. / 8.); 12, (1. / 12.) ] |> Map.ofList | |
let postDice = | |
updatePosteriorDist diceLikelihoods1 priorDiceDist | |
|> updatePosteriorDist diceLikelihoods7 | |
let updateDice data priorDist = | |
let likelihood = | |
priorDist | |
|> Map.toSeq | |
|> Seq.map (fun (k, v) -> k, (if data > k then 0. else 1. / (float k))) | |
|> Map.ofSeq | |
updatePosteriorDist likelihood priorDist | |
updateDice 7 priorDiceDist | |
// Excercises 3.1 | |
let priorDice31 = EmpiricalDistribution.createNominal () ([ 6; 8; 12 ]) | |
updateDice 1 priorDice31 |> updateDice 3 |> updateDice 5 |> updateDice 7 | |
//3.2 | |
let dice32 = | |
EmpiricalDistribution.createNominal () ([ 4; 6; 6; 8; 8; 8; 12; 12; 12; 12; 20; 20; 20; 20; 20 ]) | |
updateDice 7 dice32 | |
//3.3 | |
let priorDist = EmpiricalDistribution.createNominal () [ "Drawer 1"; "Drawer 2" ] | |
let likelihoodMatching = | |
[ "Drawer 1", (0.5 * 0.5); "Drawer 2", (0.333 * 0.333) ] |> Map.ofSeq | |
(updatePosteriorDist priorDist likelihoodMatching)["Drawer 1"] * 0.5 | |
(updatePosteriorDist priorDist likelihoodMatching)["Drawer 1"] | |
(updatePosteriorDist priorDist likelihoodMatching)["Drawer 2"] | |
// Chapter 4 - Estimating Proportions | |
let binomial = DiscreteDistribution.binomial 0.5 250 | |
let getBinomialProbs (binomialDist: DiscreteDistribution<float, int>) (data: int seq) = | |
data |> Seq.map (fun i -> i, binomialDist.PMF i) |> Map.ofSeq | |
let getBinomialProbsCDF (binomialDist: DiscreteDistribution<float, int>) (data: int seq) = | |
data |> Seq.map (fun i -> i, binomialDist.CDF i) |> Map.ofSeq | |
let prior101DistLine = | |
Chart.Line((getBinomialProbs binomial [ 0..250 ] |> Map.toSeq), Name = "n = 25, p = 0.5") | |
prior101DistLine | |
|> Chart.withXAxisStyle ("Number of Heads(k)") | |
|> Chart.withYAxisStyle ("PMF") | |
|> Chart.show | |
let highestp = (getBinomialProbs binomial [ 0..250 ]) |> Empirical.maxLike // this should really return the key as well | |
(getBinomialProbs binomial [ 0..250 ]) | |
|> Map.toSeq | |
|> Seq.filter (fun (k, v) -> v = highestp) | |
1. - (getBinomialProbsCDF binomial [ 139 ] |> Map.toSeq |> Seq.head |> snd) // p of 140 k or more - | |
//this should really be a function with greater than and less than variants. | |
//Bayesian estimation | |
let prior = [ 0.0..0.01..1 ] |> List.map (fun i -> i, 1.) |> Map.ofSeq // uniform prior meaning we have no idea how likely each proportion of heads is. | |
let likelihoodHeads = prior | |
let likelihoodTails = | |
prior | |
|> Map.map (fun k v -> | |
k | |
1. - v) | |
type FlipResult = | |
| Head | |
| Tail | |
let data = [ 1..250 ] |> List.map (fun i -> if i <= 5 then Head else Tail) | |
let updatePriorWithData prior data : Map<float, float> = | |
data | |
|> List.fold | |
(fun pmf d -> | |
match d with | |
| Head -> updatePosteriorDist likelihoodHeads pmf | |
| Tail -> updatePosteriorDist likelihoodTails pmf) | |
prior | |
let result = updatePriorWithData prior data | |
let posteriorLine = Chart.Line((result |> Map.toSeq), Name = "140 Heads out of 250") | |
posteriorLine | |
|> Chart.withXAxisStyle ("Proportion of Heads") | |
|> Chart.withYAxisStyle ("robability") | |
|> Chart.show | |
//Triangle prior | |
let triangleRes = updatePriorWithData tprior tdata // swapping priors with enough data we get same posterior result | |
let uniRes = updatePriorWithData prior tdata | |
let tprior = | |
[ 0.69 .. - 0.024 .. 0.0 ] | |
|> List.append [ 0.0..0.01..0.7 ] | |
|> List.zip [ 0.0..0.01..0.99 ] | |
|> normalise | |
open System | |
let rnd = Random() | |
let tdata = [ 1..10 ] |> List.map (fun i -> if i % 2 = 0 then Head else Tail) | |
let unfairPriorRes = updatePriorWithData tprior tdata // swapping priors with enough data we get same posterior result | |
let posteriorLine = Chart.Line((unfairPriorRes |> Map.toSeq), Name = "Posterior") | |
let priorLine = Chart.Line((tprior |> Map.toSeq), Name = "Prior") | |
[ posteriorLine; priorLine ] | |
|> Chart.combine | |
|> Chart.withXAxisStyle ("Proportion of Heads") | |
|> Chart.withYAxisStyle ("robability") | |
|> Chart.show | |
//Binomial likelihood function | |
let uniformPrior = [ 0.0..0.01..1 ] |> List.map (fun i -> i, 1.) |> Map.ofSeq | |
let likelihoods = | |
uniformPrior | |
|> Map.toSeq | |
|> Seq.map (fun (p, v) -> p, (DiscreteDistribution.binomial p 25000).PMF 14000) // creates a binomial for 100 values of p which should be more effcient than playing | |
//throuhall the data. | |
////It mean we can call updatePosteriorDist directly just once rather than via updatePriorWithData for every data point | |
|> Map.ofSeq | |
let post = updatePosteriorDist likelihoods uniformPrior |> Map.toSeq // | |
Chart.Line(post, Name = "Using binomial to update dist") | |
|> Chart.withXAxisStyle ("Proportion of Heads") | |
|> Chart.withYAxisStyle ("robability") | |
|> Chart.show | |
// Excercises | |
//4.1 | |
let linspace start stop count = | |
[| start .. ((stop - start) / (count - 1.)) .. stop |] | |
let uniformPrior = (linspace 0.1 0.4 101) |> Array.map (fun i -> i, 1.) |> normalise | |
let genLike prior n k = | |
prior | |
|> Map.toSeq | |
|> Seq.map (fun (p, v) -> p, (DiscreteDistribution.binomial p n).PMF k) | |
|> Map.ofSeq | |
let priorAfter25 = updatePosteriorDist uniformPrior (genLike uniformPrior 100 25) | |
let priorAfter28 = updatePosteriorDist uniformPrior (genLike uniformPrior 103 28) | |
let priorAfter25and33 = | |
updatePosteriorDist (priorAfter25) (genLike uniformPrior 3 3) | |
[ Chart.Line(priorAfter25 |> Map.toSeq, Name = "Probability of hit after 25/100") | |
Chart.Line(priorAfter28 |> Map.toSeq, Name = "Probability of hit after 28/103") | |
Chart.Line(priorAfter25and33 |> Map.toSeq, Name = "Probability of hit after 25/100 and then 3/3") | |
Chart.Line((uniformPrior |> Map.toSeq), Name = "Uniform Probability") ] | |
|> Chart.combine | |
|> Chart.withXAxisStyle ("Proportion of Hits") | |
|> Chart.withYAxisStyle ("Probability") | |
|> Chart.show |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment