Soares · January 17, 2022 14:32 · 3dots · Oct 10, 2018 · seisvelas · Aug 19, 2019
diff --git a/coin_bias_test.py b/coin_bias_test.py
 # Let's say you have a fair coin.
 # Let's say you don't know whether or not it's actually biased towards heads by a certain degree.
 # Let's say you can flip it at most a certain number of times.
 # What's the probability that, at some point along the way, it looks pretty likely to be biased?
 from random import random
 from scipy.misc import comb

 # The actual bias of the coin
 BIAS = 0.5
 # The number of times to flip the coin
 FLIPS = 300
 # All hypotheses will be compared only to the final hypothesis
 HYPOTHESES = (0.75, 0.55, 0.5)
 # We'll check how often the likelihood ratio goes above these values
 THRESHOLDS = (20, 100)
 # Ignore extreme odds that occur before at least this much data has been collected:
 LARGE_NUMBER = 0
 # We'll run this many squences of FLIPS flips to approximate the probability of exceeding the above thresholds.
 RUNS = 10000

 def gen_sequence():
    for i in range(FLIPS):
        yield 'H' if random() < BIAS else 'T'

 def odds_sequence(sequence):
    n_heads = 0
    n_flips = 0
    for flip in sequence:
        n_flips += 1
        if flip == 'H':
            n_heads += 1
        yield tuple(b**n_heads * (1-b)**(n_flips - n_heads) for b in HYPOTHESES)

 def relative_odds(odds, i, j):
    return odds[i] / odds[j] if odds[j] > 0 else float('inf')

 def most_extreme_odds(i, odds_list):
    assert odds_list
    rel_odds = [relative_odds(o, i, -1) for o in odds_list]
    indexed_odds = list(enumerate(rel_odds))[LARGE_NUMBER:]
    
    max_i, max_odds = max(indexed_odds, key=lambda io: io[1])
    min_i, min_odds = min(indexed_odds, key=lambda io: io[1])
    return (max_odds, max_i + 1, min_odds, min_i + 1, rel_odds[-1], len(rel_odds))

 # Generates a report for a single run of the data.
 # (Not used by default, but you can use it yourself to inspect a single run.)
 def report(odds_list):
    for i in range(len(HYPOTHESES) - 1):
        max_odds, max_n, min_odds, min_n, final_odds, final_n = most_extreme_odds(i, odds_list)
        print('Odds that the coin was {:2.0f}% biased towards heads (as opposed to {:2.0f}%)'.format(
            100.0 * HYPOTHESES[i],
            100.0 * HYPOTHESES[-1]))
        print('\tMaximum: ({} : 1) after {} flips.'.format(max_odds, max_n))
        print('\tMinimum: ({} : 1) after {} flips.'.format(min_odds, min_n))
        print('\t  Final: ({} : 1) after {} flips.'.format(final_odds, final_n))

 def report_threshold_breaches(i, odds_lists):
    assert odds_lists
    extremities = (most_extreme_odds(i, odds_list) for odds_list in odds_lists)
    middle_counters = {t: 0 for t in THRESHOLDS}
    middle_trackers = {t: [] for t in THRESHOLDS}
    final_counters = {t: 0 for t in THRESHOLDS}
    for max_odds, max_n, _, _, final_odds, _ in extremities:
        for threshold in THRESHOLDS:
            if max_odds >= threshold:
                middle_counters[threshold] += 1
                middle_trackers[threshold].append(max_n)
            if final_odds >= threshold:
                final_counters[threshold] += 1
    for threshold in THRESHOLDS:
        print('Odds for {:2.0f}% bias over {:2.0f}% went above ({} : 1) {} times, which was ~{:0.2f}% of the time'.format(
            100.0 * HYPOTHESES[i],
            100.0 * HYPOTHESES[-1],
            threshold,
            middle_counters[threshold],
            (100.0 * middle_counters[threshold]) / len(odds_lists)))
        print('\tand ended above ({} : 1) odds {} ({:0.0f}%) times.'.format(
            threshold,
            final_counters[threshold],
            (100.0 * final_counters[threshold]) / len(odds_lists)))
        print('\tthreshold max happened at around toss number {:0.2f}, on average.'.format(
            sum(middle_trackers[threshold]) / len(middle_trackers[threshold])))

 def run():
    odds_lists = [list(odds_sequence(gen_sequence())) for _ in range(RUNS)]
    for i in range(len(HYPOTHESES) - 1):
        report_threshold_breaches(i, odds_lists)
        print('')

 run()
	# Let's say you have a fair coin.
	# Let's say you don't know whether or not it's actually biased towards heads by a certain degree.
	# Let's say you can flip it at most a certain number of times.
	# What's the probability that, at some point along the way, it looks pretty likely to be biased?
	from random import random
	from scipy.misc import comb

	# The actual bias of the coin
	BIAS = 0.5
	# The number of times to flip the coin
	FLIPS = 300
	# All hypotheses will be compared only to the final hypothesis
	HYPOTHESES = (0.75, 0.55, 0.5)
	# We'll check how often the likelihood ratio goes above these values
	THRESHOLDS = (20, 100)
	# Ignore extreme odds that occur before at least this much data has been collected:
	LARGE_NUMBER = 0
	# We'll run this many squences of FLIPS flips to approximate the probability of exceeding the above thresholds.
	RUNS = 10000

	def gen_sequence():
	for i in range(FLIPS):
	yield 'H' if random() < BIAS else 'T'

	def odds_sequence(sequence):
	n_heads = 0
	n_flips = 0
	for flip in sequence:
	n_flips += 1
	if flip == 'H':
	n_heads += 1
	yield tuple(b*n_heads (1-b)**(n_flips - n_heads) for b in HYPOTHESES)

	def relative_odds(odds, i, j):
	return odds[i] / odds[j] if odds[j] > 0 else float('inf')

	def most_extreme_odds(i, odds_list):
	assert odds_list
	rel_odds = [relative_odds(o, i, -1) for o in odds_list]
	indexed_odds = list(enumerate(rel_odds))[LARGE_NUMBER:]

	max_i, max_odds = max(indexed_odds, key=lambda io: io[1])
	min_i, min_odds = min(indexed_odds, key=lambda io: io[1])
	return (max_odds, max_i + 1, min_odds, min_i + 1, rel_odds[-1], len(rel_odds))

	# Generates a report for a single run of the data.
	# (Not used by default, but you can use it yourself to inspect a single run.)
	def report(odds_list):
	for i in range(len(HYPOTHESES) - 1):
	max_odds, max_n, min_odds, min_n, final_odds, final_n = most_extreme_odds(i, odds_list)
	print('Odds that the coin was {:2.0f}% biased towards heads (as opposed to {:2.0f}%)'.format(
	100.0 * HYPOTHESES[i],
	100.0 * HYPOTHESES[-1]))
	print('\tMaximum: ({} : 1) after {} flips.'.format(max_odds, max_n))
	print('\tMinimum: ({} : 1) after {} flips.'.format(min_odds, min_n))
	print('\t Final: ({} : 1) after {} flips.'.format(final_odds, final_n))

	def report_threshold_breaches(i, odds_lists):
	assert odds_lists
	extremities = (most_extreme_odds(i, odds_list) for odds_list in odds_lists)
	middle_counters = {t: 0 for t in THRESHOLDS}
	middle_trackers = {t: [] for t in THRESHOLDS}
	final_counters = {t: 0 for t in THRESHOLDS}
	for max_odds, max_n, _, _, final_odds, _ in extremities:
	for threshold in THRESHOLDS:
	if max_odds >= threshold:
	middle_counters[threshold] += 1
	middle_trackers[threshold].append(max_n)
	if final_odds >= threshold:
	final_counters[threshold] += 1
	for threshold in THRESHOLDS:
	print('Odds for {:2.0f}% bias over {:2.0f}% went above ({} : 1) {} times, which was ~{:0.2f}% of the time'.format(
	100.0 * HYPOTHESES[i],
	100.0 * HYPOTHESES[-1],
	threshold,
	middle_counters[threshold],
	(100.0 * middle_counters[threshold]) / len(odds_lists)))
	print('\tand ended above ({} : 1) odds {} ({:0.0f}%) times.'.format(
	threshold,
	final_counters[threshold],
	(100.0 * final_counters[threshold]) / len(odds_lists)))
	print('\tthreshold max happened at around toss number {:0.2f}, on average.'.format(
	sum(middle_trackers[threshold]) / len(middle_trackers[threshold])))

	def run():
	odds_lists = [list(odds_sequence(gen_sequence())) for _ in range(RUNS)]
	for i in range(len(HYPOTHESES) - 1):
	report_threshold_breaches(i, odds_lists)
	print('')

	run()