Created
January 23, 2024 11:20
-
-
Save FrankRuns/b8413d6194af859a05640ccc07d96dfa to your computer and use it in GitHub Desktop.
Calculations supporting substack post called Analytics, now what?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Replicate Bob's results from this LinkedIn post: | |
# https://www.linkedin.com/posts/bob-wilson-77a22ab_people-sometimes-say-ab-testing-requires-activity-7152792859878871040-X1Sr?utm_source=share&utm_medium=member_desktop | |
### Implement Fisher's Exact Test | |
# Create the contingency table | |
contingency_table <- matrix(c(0, 4, 7, 3), nrow = 2) | |
dimnames(contingency_table) <- list(c("Control", "Treatment"), | |
c("Successes", "Failures")) | |
# Fisher's Exact Test is used here to determine if there are nonrandom | |
# associations between two categorical variables in our contingency table. | |
fisher_test_result <- fisher.test(contingency_table, alternative = "greater") | |
p_value <- fisher_test_result$p.value | |
### Implement Bob's hypergeometric distribution approach | |
# The hypergeometric distribution helps in understanding the likelihood of a | |
# specific number of successes in a sample, given the total number of successes | |
# and failures in the population. | |
# Parameters for the hypergeometric distribution | |
M <- 14 # Total number of objects (total trials) | |
n <- 3 # Total number of Type I objects (total successes) | |
N <- 7 # Total number of objects drawn (trials in one group) | |
# The number of successes in the treatment group | |
x <- 3 | |
# Calculate the one-sided p-value using the hypergeometric distribution | |
p_value <- phyper(x - 1, n, M - n, N, lower.tail = FALSE) | |
### Bayesian approach inspecting treatment data | |
# In the Bayesian approach, we start with prior beliefs about the probability | |
# of success (here, a 2:1 prior), and then update these beliefs based on the | |
# observed data (successes and trials). | |
# Define the parameters (with 2:1 prior) | |
prior_a <- 2 | |
prior_b <- 1 | |
successes <- 4 | |
trials <- 7 | |
# Calculate the posterior parameters | |
posterior_a <- prior_a + successes | |
posterior_b <- prior_b + trials - successes | |
# Create a sequence of probabilities for plotting the posterior | |
probabilities <- seq(0, 1, length.out = 100) | |
# Compute the posterior distribution | |
posterior <- dbeta(probabilities, posterior_a, posterior_b) | |
# Plot the posterior distribution | |
plot(probabilities, posterior, type = 'l', col = 'blue', | |
xlab = 'Probability of Effectiveness', ylab = 'Density', | |
main = 'Posterior Distribution of White Noise Machine Effectiveness') | |
# Add a line for the prior distribution for comparison | |
lines(probabilities, dbeta(probabilities, prior_a, prior_b), col = 'red') | |
legend('topright', legend = c('Posterior', 'Prior'), col = c('blue', 'red'), lty = 1) | |
# How much area under my prior curve? | |
# Calculating the area to the left of 0.5 | |
area_left_of_0_5 <- pbeta(0.5, prior_a, prior_b) | |
# Calculating the area to the right of 0.5 | |
area_right_of_0_5 <- 1 - area_left_of_0_5 | |
# Printing the results | |
cat("Area to the left of 0.5: ", area_left_of_0_5, "\n") | |
cat("Area to the right of 0.5: ", area_right_of_0_5, "\n") | |
# How much area under my posterior curve? | |
# Your existing code for posterior parameters | |
posterior_a <- prior_a + successes | |
posterior_b <- prior_b + trials - successes | |
# Calculating the area to the left of 0.5 for the posterior distribution | |
posterior_area_left_of_0_5 <- pbeta(0.5, posterior_a, posterior_b) | |
# Calculating the area to the right of 0.5 for the posterior distribution | |
posterior_area_right_of_0_5 <- 1 - posterior_area_left_of_0_5 | |
# Printing the results | |
cat("Posterior distribution - Area to the left of 0.5: ", posterior_area_left_of_0_5, "\n") | |
cat("Posterior distribution - Area to the right of 0.5: ", posterior_area_right_of_0_5, "\n") | |
### Bayesian approach compare treatment and control (with prior of 2:1) | |
# Prior parameters | |
prior_a <- 2 | |
prior_b <- 1 | |
# Data | |
treatment_successes <- 4 | |
treatment_trials <- 7 | |
control_successes <- 0 | |
control_trials <- 7 | |
# Posterior parameters for treatment and control | |
posterior_a_treatment <- prior_a + treatment_successes | |
posterior_b_treatment <- prior_b + treatment_trials - treatment_successes | |
posterior_a_control <- prior_a + control_successes | |
posterior_b_control <- prior_b + control_trials - control_successes | |
# Probability sequence for plotting | |
probabilities <- seq(0, 1, length.out = 100) | |
# Posterior distributions | |
posterior_treatment <- dbeta(probabilities, posterior_a_treatment, posterior_b_treatment) | |
posterior_control <- dbeta(probabilities, posterior_a_control, posterior_b_control) | |
# Find the maximum density values for both distributions | |
max_density_treatment <- max(posterior_treatment) | |
max_density_control <- max(posterior_control) | |
max_density <- max(max_density_treatment, max_density_control) | |
# Adjust ylim to include the maximum density value | |
plot(probabilities, posterior_treatment, type = 'l', col = 'blue', | |
xlab = 'Probability of Effectiveness', ylab = 'Density', | |
main = 'Posterior Distributions', | |
ylim = c(0, max_density * 1.1)) # Adding a 10% buffer to ensure visibility | |
lines(probabilities, posterior_control, col = 'red') | |
legend('topright', legend = c('Treatment', 'Control'), col = c('blue', 'red'), lty = 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment