Last active
January 17, 2022 14:32
-
-
Save Soares/941bdb13233fd0838f1882d148c9ac14 to your computer and use it in GitHub Desktop.
Let's say you have a fair coin, and can flip it at most 300 times. What percentage of the time will tossing the coin generate a sequence that, at some point along the way, supports the hypothesis "the coin is biased 75% towards heads" 20x as much as it supports the hypothesis "the coin is fair"?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Let's say you have a fair coin. | |
# Let's say you don't know whether or not it's actually biased towards heads by a certain degree. | |
# Let's say you can flip it at most a certain number of times. | |
# What's the probability that, at some point along the way, it looks pretty likely to be biased? | |
from random import random | |
from scipy.misc import comb | |
# The actual bias of the coin | |
BIAS = 0.5 | |
# The number of times to flip the coin | |
FLIPS = 300 | |
# All hypotheses will be compared only to the final hypothesis | |
HYPOTHESES = (0.75, 0.55, 0.5) | |
# We'll check how often the likelihood ratio goes above these values | |
THRESHOLDS = (20, 100) | |
# Ignore extreme odds that occur before at least this much data has been collected: | |
LARGE_NUMBER = 0 | |
# We'll run this many squences of FLIPS flips to approximate the probability of exceeding the above thresholds. | |
RUNS = 10000 | |
def gen_sequence(): | |
for i in range(FLIPS): | |
yield 'H' if random() < BIAS else 'T' | |
def odds_sequence(sequence): | |
n_heads = 0 | |
n_flips = 0 | |
for flip in sequence: | |
n_flips += 1 | |
if flip == 'H': | |
n_heads += 1 | |
yield tuple(b**n_heads * (1-b)**(n_flips - n_heads) for b in HYPOTHESES) | |
def relative_odds(odds, i, j): | |
return odds[i] / odds[j] if odds[j] > 0 else float('inf') | |
def most_extreme_odds(i, odds_list): | |
assert odds_list | |
rel_odds = [relative_odds(o, i, -1) for o in odds_list] | |
indexed_odds = list(enumerate(rel_odds))[LARGE_NUMBER:] | |
max_i, max_odds = max(indexed_odds, key=lambda io: io[1]) | |
min_i, min_odds = min(indexed_odds, key=lambda io: io[1]) | |
return (max_odds, max_i + 1, min_odds, min_i + 1, rel_odds[-1], len(rel_odds)) | |
# Generates a report for a single run of the data. | |
# (Not used by default, but you can use it yourself to inspect a single run.) | |
def report(odds_list): | |
for i in range(len(HYPOTHESES) - 1): | |
max_odds, max_n, min_odds, min_n, final_odds, final_n = most_extreme_odds(i, odds_list) | |
print('Odds that the coin was {:2.0f}% biased towards heads (as opposed to {:2.0f}%)'.format( | |
100.0 * HYPOTHESES[i], | |
100.0 * HYPOTHESES[-1])) | |
print('\tMaximum: ({} : 1) after {} flips.'.format(max_odds, max_n)) | |
print('\tMinimum: ({} : 1) after {} flips.'.format(min_odds, min_n)) | |
print('\t Final: ({} : 1) after {} flips.'.format(final_odds, final_n)) | |
def report_threshold_breaches(i, odds_lists): | |
assert odds_lists | |
extremities = (most_extreme_odds(i, odds_list) for odds_list in odds_lists) | |
middle_counters = {t: 0 for t in THRESHOLDS} | |
middle_trackers = {t: [] for t in THRESHOLDS} | |
final_counters = {t: 0 for t in THRESHOLDS} | |
for max_odds, max_n, _, _, final_odds, _ in extremities: | |
for threshold in THRESHOLDS: | |
if max_odds >= threshold: | |
middle_counters[threshold] += 1 | |
middle_trackers[threshold].append(max_n) | |
if final_odds >= threshold: | |
final_counters[threshold] += 1 | |
for threshold in THRESHOLDS: | |
print('Odds for {:2.0f}% bias over {:2.0f}% went above ({} : 1) {} times, which was ~{:0.2f}% of the time'.format( | |
100.0 * HYPOTHESES[i], | |
100.0 * HYPOTHESES[-1], | |
threshold, | |
middle_counters[threshold], | |
(100.0 * middle_counters[threshold]) / len(odds_lists))) | |
print('\tand ended above ({} : 1) odds {} ({:0.0f}%) times.'.format( | |
threshold, | |
final_counters[threshold], | |
(100.0 * final_counters[threshold]) / len(odds_lists))) | |
print('\tthreshold max happened at around toss number {:0.2f}, on average.'.format( | |
sum(middle_trackers[threshold]) / len(middle_trackers[threshold]))) | |
def run(): | |
odds_lists = [list(odds_sequence(gen_sequence())) for _ in range(RUNS)] | |
for i in range(len(HYPOTHESES) - 1): | |
report_threshold_breaches(i, odds_lists) | |
print('') | |
run() |
LARGE_NUMBER = 0
This glorious little piece of irony made me chuckle :)
you might want to update:
- from scipy.misc import comb
+ from scipy.special import comb
https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.comb.html
(or state the version of scipy in which the script is supposed to work :))
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
odds_sequence(sequence): is actually Probability Sequence based on the current sequence of flips. i.e. how likely it is that this exact sequence of flips would occur given this supposed hypothesized bias.
Reading about odds in https://arbital.com/p/bayes_extraordinary_claims/?pathId=41159, odds are usually a ratio, like 3 : 2.
Your thing is returning 3/(2+3) = 0.6 kind of thing, not the 'odds' per se.