FrankRuns · January 23, 2024 11:20
diff --git a/white-noise.R b/white-noise.R

 # Replicate Bob's results from this LinkedIn post:
 # https://www.linkedin.com/posts/bob-wilson-77a22ab_people-sometimes-say-ab-testing-requires-activity-7152792859878871040-X1Sr?utm_source=share&utm_medium=member_desktop


 ### Implement Fisher's Exact Test

 # Create the contingency table
 contingency_table <- matrix(c(0, 4, 7, 3), nrow = 2)
 dimnames(contingency_table) <- list(c("Control", "Treatment"),
                                    c("Successes", "Failures"))

 # Fisher's Exact Test is used here to determine if there are nonrandom
 # associations between two categorical variables in our contingency table.
 fisher_test_result <- fisher.test(contingency_table, alternative = "greater")
 p_value <- fisher_test_result$p.value


 ### Implement Bob's hypergeometric distribution approach

 # The hypergeometric distribution helps in understanding the likelihood of a
 # specific number of successes in a sample, given the total number of successes
 # and failures in the population.
 # Parameters for the hypergeometric distribution
 M <- 14 # Total number of objects (total trials)
 n <- 3  # Total number of Type I objects (total successes)
 N <- 7  # Total number of objects drawn (trials in one group)

 # The number of successes in the treatment group
 x <- 3

 # Calculate the one-sided p-value using the hypergeometric distribution
 p_value <- phyper(x - 1, n, M - n, N, lower.tail = FALSE)


 ### Bayesian approach inspecting treatment data

 # In the Bayesian approach, we start with prior beliefs about the probability
 # of success (here, a 2:1 prior), and then update these beliefs based on the
 # observed data (successes and trials).

 # Define the parameters (with 2:1 prior)
 prior_a <- 2
 prior_b <- 1
 successes <- 4
 trials <- 7

 # Calculate the posterior parameters
 posterior_a <- prior_a + successes
 posterior_b <- prior_b + trials - successes

 # Create a sequence of probabilities for plotting the posterior
 probabilities <- seq(0, 1, length.out = 100)

 # Compute the posterior distribution
 posterior <- dbeta(probabilities, posterior_a, posterior_b)

 # Plot the posterior distribution
 plot(probabilities, posterior, type = 'l', col = 'blue', 
     xlab = 'Probability of Effectiveness', ylab = 'Density',
     main = 'Posterior Distribution of White Noise Machine Effectiveness')

 # Add a line for the prior distribution for comparison
 lines(probabilities, dbeta(probabilities, prior_a, prior_b), col = 'red')
 legend('topright', legend = c('Posterior', 'Prior'), col = c('blue', 'red'), lty = 1)

 # How much area under my prior curve?

 # Calculating the area to the left of 0.5
 area_left_of_0_5 <- pbeta(0.5, prior_a, prior_b)

 # Calculating the area to the right of 0.5
 area_right_of_0_5 <- 1 - area_left_of_0_5

 # Printing the results
 cat("Area to the left of 0.5: ", area_left_of_0_5, "\n")
 cat("Area to the right of 0.5: ", area_right_of_0_5, "\n")

 # How much area under my posterior curve?

 # Your existing code for posterior parameters
 posterior_a <- prior_a + successes
 posterior_b <- prior_b + trials - successes

 # Calculating the area to the left of 0.5 for the posterior distribution
 posterior_area_left_of_0_5 <- pbeta(0.5, posterior_a, posterior_b)

 # Calculating the area to the right of 0.5 for the posterior distribution
 posterior_area_right_of_0_5 <- 1 - posterior_area_left_of_0_5

 # Printing the results
 cat("Posterior distribution - Area to the left of 0.5: ", posterior_area_left_of_0_5, "\n")
 cat("Posterior distribution - Area to the right of 0.5: ", posterior_area_right_of_0_5, "\n")


 ### Bayesian approach compare treatment and control (with prior of 2:1)

 # Prior parameters
 prior_a <- 2
 prior_b <- 1

 # Data
 treatment_successes <- 4
 treatment_trials <- 7
 control_successes <- 0
 control_trials <- 7

 # Posterior parameters for treatment and control
 posterior_a_treatment <- prior_a + treatment_successes
 posterior_b_treatment <- prior_b + treatment_trials - treatment_successes
 posterior_a_control <- prior_a + control_successes
 posterior_b_control <- prior_b + control_trials - control_successes

 # Probability sequence for plotting
 probabilities <- seq(0, 1, length.out = 100)

 # Posterior distributions
 posterior_treatment <- dbeta(probabilities, posterior_a_treatment, posterior_b_treatment)
 posterior_control <- dbeta(probabilities, posterior_a_control, posterior_b_control)

 # Find the maximum density values for both distributions
 max_density_treatment <- max(posterior_treatment)
 max_density_control <- max(posterior_control)
 max_density <- max(max_density_treatment, max_density_control)

 # Adjust ylim to include the maximum density value
 plot(probabilities, posterior_treatment, type = 'l', col = 'blue', 
     xlab = 'Probability of Effectiveness', ylab = 'Density',
     main = 'Posterior Distributions',
     ylim = c(0, max_density * 1.1)) # Adding a 10% buffer to ensure visibility

 lines(probabilities, posterior_control, col = 'red')
 legend('topright', legend = c('Treatment', 'Control'), col = c('blue', 'red'), lty = 1)

	# Replicate Bob's results from this LinkedIn post:
	# https://www.linkedin.com/posts/bob-wilson-77a22ab_people-sometimes-say-ab-testing-requires-activity-7152792859878871040-X1Sr?utm_source=share&utm_medium=member_desktop


	### Implement Fisher's Exact Test

	# Create the contingency table
	contingency_table <- matrix(c(0, 4, 7, 3), nrow = 2)
	dimnames(contingency_table) <- list(c("Control", "Treatment"),
	c("Successes", "Failures"))

	# Fisher's Exact Test is used here to determine if there are nonrandom
	# associations between two categorical variables in our contingency table.
	fisher_test_result <- fisher.test(contingency_table, alternative = "greater")
	p_value <- fisher_test_result$p.value


	### Implement Bob's hypergeometric distribution approach

	# The hypergeometric distribution helps in understanding the likelihood of a
	# specific number of successes in a sample, given the total number of successes
	# and failures in the population.
	# Parameters for the hypergeometric distribution
	M <- 14 # Total number of objects (total trials)
	n <- 3 # Total number of Type I objects (total successes)
	N <- 7 # Total number of objects drawn (trials in one group)

	# The number of successes in the treatment group
	x <- 3

	# Calculate the one-sided p-value using the hypergeometric distribution
	p_value <- phyper(x - 1, n, M - n, N, lower.tail = FALSE)


	### Bayesian approach inspecting treatment data

	# In the Bayesian approach, we start with prior beliefs about the probability
	# of success (here, a 2:1 prior), and then update these beliefs based on the
	# observed data (successes and trials).

	# Define the parameters (with 2:1 prior)
	prior_a <- 2
	prior_b <- 1
	successes <- 4
	trials <- 7

	# Calculate the posterior parameters
	posterior_a <- prior_a + successes
	posterior_b <- prior_b + trials - successes

	# Create a sequence of probabilities for plotting the posterior
	probabilities <- seq(0, 1, length.out = 100)

	# Compute the posterior distribution
	posterior <- dbeta(probabilities, posterior_a, posterior_b)

	# Plot the posterior distribution
	plot(probabilities, posterior, type = 'l', col = 'blue',
	xlab = 'Probability of Effectiveness', ylab = 'Density',
	main = 'Posterior Distribution of White Noise Machine Effectiveness')

	# Add a line for the prior distribution for comparison
	lines(probabilities, dbeta(probabilities, prior_a, prior_b), col = 'red')
	legend('topright', legend = c('Posterior', 'Prior'), col = c('blue', 'red'), lty = 1)

	# How much area under my prior curve?

	# Calculating the area to the left of 0.5
	area_left_of_0_5 <- pbeta(0.5, prior_a, prior_b)

	# Calculating the area to the right of 0.5
	area_right_of_0_5 <- 1 - area_left_of_0_5

	# Printing the results
	cat("Area to the left of 0.5: ", area_left_of_0_5, "\n")
	cat("Area to the right of 0.5: ", area_right_of_0_5, "\n")

	# How much area under my posterior curve?

	# Your existing code for posterior parameters
	posterior_a <- prior_a + successes
	posterior_b <- prior_b + trials - successes

	# Calculating the area to the left of 0.5 for the posterior distribution
	posterior_area_left_of_0_5 <- pbeta(0.5, posterior_a, posterior_b)

	# Calculating the area to the right of 0.5 for the posterior distribution
	posterior_area_right_of_0_5 <- 1 - posterior_area_left_of_0_5

	# Printing the results
	cat("Posterior distribution - Area to the left of 0.5: ", posterior_area_left_of_0_5, "\n")
	cat("Posterior distribution - Area to the right of 0.5: ", posterior_area_right_of_0_5, "\n")


	### Bayesian approach compare treatment and control (with prior of 2:1)

	# Prior parameters
	prior_a <- 2
	prior_b <- 1

	# Data
	treatment_successes <- 4
	treatment_trials <- 7
	control_successes <- 0
	control_trials <- 7

	# Posterior parameters for treatment and control
	posterior_a_treatment <- prior_a + treatment_successes
	posterior_b_treatment <- prior_b + treatment_trials - treatment_successes
	posterior_a_control <- prior_a + control_successes
	posterior_b_control <- prior_b + control_trials - control_successes

	# Probability sequence for plotting
	probabilities <- seq(0, 1, length.out = 100)

	# Posterior distributions
	posterior_treatment <- dbeta(probabilities, posterior_a_treatment, posterior_b_treatment)
	posterior_control <- dbeta(probabilities, posterior_a_control, posterior_b_control)

	# Find the maximum density values for both distributions
	max_density_treatment <- max(posterior_treatment)
	max_density_control <- max(posterior_control)
	max_density <- max(max_density_treatment, max_density_control)

	# Adjust ylim to include the maximum density value
	plot(probabilities, posterior_treatment, type = 'l', col = 'blue',
	xlab = 'Probability of Effectiveness', ylab = 'Density',
	main = 'Posterior Distributions',
	ylim = c(0, max_density * 1.1)) # Adding a 10% buffer to ensure visibility

	lines(probabilities, posterior_control, col = 'red')
	legend('topright', legend = c('Treatment', 'Control'), col = c('blue', 'red'), lty = 1)