FrankRuns · March 16, 2022 12:12
diff --git a/is-behavior-change-happening.Rmd b/is-behavior-change-happening.Rmd
 ---
 title: "Is-Behavior-Change-Happening"
 author: "frank-corrigan"
 date: "3/9/2022"
 output: html_document
 ---

 ```{r setup, include=FALSE}
 knitr::opts_chunk$set(echo = TRUE)

 # libraries
 library(tidyverse)
 library(rethinking)
 ```

 ## Single Behavior - Super Optimistic Starting Point

 As a leader, I want to be confident that certain behaviors are being demonstrated across my organizations. I can combine my starting intuition with observational data to understand the uncertainty around the proportion of time these certain behaviors are being practiced.

 ```{r context-slide-too-optimistic}

 # create a grid of estimates for proportion of time behaviors are being used
 p_grid <- seq(from=0, to=1, length.out = 1000)

 # create a prior for each estimate in the grid above
 # in this scenario, the leader is too optimistic saying it's happening 
 # at least 85% of the time.
 prior <- c(rep(0, 850), rep(1, 100), rep(0, 50))

 # create a likelihood for each estimate based on observed data
 # in this case the observed data is that the leader saw the behavior demonstrated
 # at 9/10 meetings.
 likelihood <- dbinom(9, size=10, prob=p_grid)

 # create posterior. this is a combination of your intuition and 
 # observed data points.
 unstd.posterior <- likelihood * prior

 # normalize the posterior so it's a probability
 posterior <- unstd.posterior / sum(unstd.posterior)

 # sample the posterior so we can WHAT????
 samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)

 print(paste("The behavior is most likely occuring between,",
            round(HPDI(samples)[1],4)*100, "and",
            round(HPDI(samples)[2],4)*100, "percent of the time."))
 print(paste("And our best guesss is,",
            round(median(samples),4)*100,
            "percent of the time."))

 ```

 ### Visualizing the Optimistic Possibilities

 ```{r context-slide-too-optimistic-plot}

 the_dens <- density(samples)

 the_data <- tibble(x = the_dens$x, y = the_dens$y) %>%
  mutate(variable = case_when(
    x <= 0.9 ~ "Lower",
    x >= 0.9 ~ "Higher",
    TRUE ~ NA_character_
  ))

 ggplot(the_data, aes(x,y)) + geom_line() +
  geom_area(data = filter(the_data, variable == "Lower"), fill = "#d73027") +
  geom_area(data = filter(the_data, variable == "Higher"), fill = "#1a9850") +
  scale_x_continuous(limits = c(0.5,1), labels = scales::percent) +
  geom_vline(xintercept = 0.9) +
  theme_minimal() +
  theme(text = element_text(size = 20),
        plot.title = element_text(face = "bold"),
        plot.subtitle = element_text(size = 12),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank()) +
  labs(x="Proportion of Time Behaviors are Happening",
       y="Density of Possibilities",
       title="Super Optimistic Starting Assumption",
       subtitle="Using a super optimistic starting assumption, you can call it a victory...")

 ```
 ## Single Behavior - Cautious, Realistic Starting Point

 ```{r context-slide-cautious-realistic}

 # create a grid of estimates for proportion of time behaviors are being used
 p_grid <- seq(from=0, to=1, length.out = 1000)

 # create a prior for each estimate in the grid above
 # in this scenario, the leader is too optimistic saying it's happening 
 # at least 50% of the time.
 prior <- c(rep(0, 500), rep(1, 450), rep(0, 50))
 # prior <- c(rep(0, 675), rep(1, 275), rep(0, 50))

 # create a likelihood for each estimate based on observed data
 # in this case the observed data is that the leader saw the behavior demonstrated
 # at 9/10 meetings.
 likelihood <- dbinom(9, size=10, prob=p_grid)

 # create posterior. this is a combination of your intuition and 
 # observed data points.
 unstd.posterior <- likelihood * prior

 # normalize the posterior so it's a probability
 posterior <- unstd.posterior / sum(unstd.posterior)

 # sample the posterior so we can WHAT????
 samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)

 print(paste("The bahavior is most likely occuring between,",
            round(HPDI(samples)[1],4)*100, "and",
            round(HPDI(samples)[2],4)*100, "percent of the time."))
 print(paste("And our best guesss is,",
            round(median(samples),4)*100,
            "percent of the time."))

 ```
 ### Visualizing the Cautious Possibilities

 ```{r context-slide-cautious-realistic-plot}

 the_dens <- density(samples)

 the_data <- tibble(x = the_dens$x, y = the_dens$y) %>%
  mutate(variable = case_when(
    x <= 0.9 ~ "Lower",
    x >= 0.9 ~ "Higher",
    TRUE ~ NA_character_
  ))

 ggplot(the_data, aes(x,y)) + geom_line() +
  geom_area(data = filter(the_data, variable == "Lower"), fill = "#d73027") +
  geom_area(data = filter(the_data, variable == "Higher"), fill = "#1a9850") +
  scale_x_continuous(limits = c(0.5,1), labels = scales::percent) +
  geom_vline(xintercept = 0.9) +
  theme_minimal() +
  theme(text = element_text(size = 20),
        plot.title = element_text(face = "bold"),
        plot.subtitle = element_text(size = 12),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank()) +
  labs(x="Proportion of Time Behaviors are Happening",
       y="Density of Possibilities",
       title="Cautious & Realistic Starting Assumption",
       subtitle="Using a more cautious & skeptical starting assumption, it's time to listen to your team...")

 ```

 ### Variations

 ```{r variations}

 updating <- function(prior, obs_positive, obs) {
  
  # inputs
  # prior = number between 0-950 representing how confident you
  # are the behaviors are happening at higher rates
  # obs_count = occurrences you observed behavior out of obs
  # obs = number of total observations
  
  p_grid <- seq(from=0, to=1, length.out = 1000)
  prior <- c(rep(0, prior), rep(1, 1000-prior-50), rep(0, 50))
  likelihood <- dbinom(obs_positive, size=obs, prob=p_grid)
  unstd.posterior <- likelihood * prior
  posterior <- unstd.posterior / sum(unstd.posterior)
  samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)
  
  return(median(samples))
  
 }

 # vecs to play with parameters
 priors_vec <- c(500, 675, 850)
 obs_pos_vec <- c(8, 9, 10)
 obs_vec <- c(10, 100, 1000)

 # as your starting assumption is more confident,
 # your updated confidence grows higher
 for (prior in priors_vec) {
  print(updating(prior, 9, 10))
 }

 # as number of positive observations increases,
 # the less uncertainty remains
 for (obs_pos in obs_pos_vec) {
  print(updating(500, obs_pos, 10))
 }

 # as the total number of observations increases,
 # and the rate of positives remains steady,
 # the certainty converges toward the observed data
 for (obs in obs_vec) {
  print(updating(500, obs*0.9, obs))
 }

 ```
	---
	title: "Is-Behavior-Change-Happening"
	author: "frank-corrigan"
	date: "3/9/2022"
	output: html_document
	---

	```{r setup, include=FALSE}
	knitr::opts_chunk$set(echo = TRUE)

	# libraries
	library(tidyverse)
	library(rethinking)
	```

	## Single Behavior - Super Optimistic Starting Point

	As a leader, I want to be confident that certain behaviors are being demonstrated across my organizations. I can combine my starting intuition with observational data to understand the uncertainty around the proportion of time these certain behaviors are being practiced.

	```{r context-slide-too-optimistic}

	# create a grid of estimates for proportion of time behaviors are being used
	p_grid <- seq(from=0, to=1, length.out = 1000)

	# create a prior for each estimate in the grid above
	# in this scenario, the leader is too optimistic saying it's happening
	# at least 85% of the time.
	prior <- c(rep(0, 850), rep(1, 100), rep(0, 50))

	# create a likelihood for each estimate based on observed data
	# in this case the observed data is that the leader saw the behavior demonstrated
	# at 9/10 meetings.
	likelihood <- dbinom(9, size=10, prob=p_grid)

	# create posterior. this is a combination of your intuition and
	# observed data points.
	unstd.posterior <- likelihood * prior

	# normalize the posterior so it's a probability
	posterior <- unstd.posterior / sum(unstd.posterior)

	# sample the posterior so we can WHAT????
	samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)

	print(paste("The behavior is most likely occuring between,",
	round(HPDI(samples)[1],4)*100, "and",
	round(HPDI(samples)[2],4)*100, "percent of the time."))
	print(paste("And our best guesss is,",
	round(median(samples),4)*100,
	"percent of the time."))

	```

	### Visualizing the Optimistic Possibilities

	```{r context-slide-too-optimistic-plot}

	the_dens <- density(samples)

	the_data <- tibble(x = the_dens$x, y = the_dens$y) %>%
	mutate(variable = case_when(
	x <= 0.9 ~ "Lower",
	x >= 0.9 ~ "Higher",
	TRUE ~ NA_character_
	))

	ggplot(the_data, aes(x,y)) + geom_line() +
	geom_area(data = filter(the_data, variable == "Lower"), fill = "#d73027") +
	geom_area(data = filter(the_data, variable == "Higher"), fill = "#1a9850") +
	scale_x_continuous(limits = c(0.5,1), labels = scales::percent) +
	geom_vline(xintercept = 0.9) +
	theme_minimal() +
	theme(text = element_text(size = 20),
	plot.title = element_text(face = "bold"),
	plot.subtitle = element_text(size = 12),
	axis.text.y = element_blank(),
	axis.ticks.y = element_blank()) +
	labs(x="Proportion of Time Behaviors are Happening",
	y="Density of Possibilities",
	title="Super Optimistic Starting Assumption",
	subtitle="Using a super optimistic starting assumption, you can call it a victory...")

	```
	## Single Behavior - Cautious, Realistic Starting Point

	```{r context-slide-cautious-realistic}

	# create a grid of estimates for proportion of time behaviors are being used
	p_grid <- seq(from=0, to=1, length.out = 1000)

	# create a prior for each estimate in the grid above
	# in this scenario, the leader is too optimistic saying it's happening
	# at least 50% of the time.
	prior <- c(rep(0, 500), rep(1, 450), rep(0, 50))
	# prior <- c(rep(0, 675), rep(1, 275), rep(0, 50))

	# create a likelihood for each estimate based on observed data
	# in this case the observed data is that the leader saw the behavior demonstrated
	# at 9/10 meetings.
	likelihood <- dbinom(9, size=10, prob=p_grid)

	# create posterior. this is a combination of your intuition and
	# observed data points.
	unstd.posterior <- likelihood * prior

	# normalize the posterior so it's a probability
	posterior <- unstd.posterior / sum(unstd.posterior)

	# sample the posterior so we can WHAT????
	samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)

	print(paste("The bahavior is most likely occuring between,",
	round(HPDI(samples)[1],4)*100, "and",
	round(HPDI(samples)[2],4)*100, "percent of the time."))
	print(paste("And our best guesss is,",
	round(median(samples),4)*100,
	"percent of the time."))

	```
	### Visualizing the Cautious Possibilities

	```{r context-slide-cautious-realistic-plot}

	the_dens <- density(samples)

	the_data <- tibble(x = the_dens$x, y = the_dens$y) %>%
	mutate(variable = case_when(
	x <= 0.9 ~ "Lower",
	x >= 0.9 ~ "Higher",
	TRUE ~ NA_character_
	))

	ggplot(the_data, aes(x,y)) + geom_line() +
	geom_area(data = filter(the_data, variable == "Lower"), fill = "#d73027") +
	geom_area(data = filter(the_data, variable == "Higher"), fill = "#1a9850") +
	scale_x_continuous(limits = c(0.5,1), labels = scales::percent) +
	geom_vline(xintercept = 0.9) +
	theme_minimal() +
	theme(text = element_text(size = 20),
	plot.title = element_text(face = "bold"),
	plot.subtitle = element_text(size = 12),
	axis.text.y = element_blank(),
	axis.ticks.y = element_blank()) +
	labs(x="Proportion of Time Behaviors are Happening",
	y="Density of Possibilities",
	title="Cautious & Realistic Starting Assumption",
	subtitle="Using a more cautious & skeptical starting assumption, it's time to listen to your team...")

	```

	### Variations

	```{r variations}

	updating <- function(prior, obs_positive, obs) {

	# inputs
	# prior = number between 0-950 representing how confident you
	# are the behaviors are happening at higher rates
	# obs_count = occurrences you observed behavior out of obs
	# obs = number of total observations

	p_grid <- seq(from=0, to=1, length.out = 1000)
	prior <- c(rep(0, prior), rep(1, 1000-prior-50), rep(0, 50))
	likelihood <- dbinom(obs_positive, size=obs, prob=p_grid)
	unstd.posterior <- likelihood * prior
	posterior <- unstd.posterior / sum(unstd.posterior)
	samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)

	return(median(samples))

	}

	# vecs to play with parameters
	priors_vec <- c(500, 675, 850)
	obs_pos_vec <- c(8, 9, 10)
	obs_vec <- c(10, 100, 1000)

	# as your starting assumption is more confident,
	# your updated confidence grows higher
	for (prior in priors_vec) {
	print(updating(prior, 9, 10))
	}

	# as number of positive observations increases,
	# the less uncertainty remains
	for (obs_pos in obs_pos_vec) {
	print(updating(500, obs_pos, 10))
	}

	# as the total number of observations increases,
	# and the rate of positives remains steady,
	# the certainty converges toward the observed data
	for (obs in obs_vec) {
	print(updating(500, obs*0.9, obs))
	}

	```