Benjamin Soltoff
January 14, 2016
# load packages
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: tidyr
## Loading required package: readr
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:tidyr':
##
## extract
## Loading required package: ggplot2
# read in cleaned play data
plays <- read_csv("data/pbp_cleaned.csv")
plays
## Source: local data frame [525,393 x 54]
##
## gid pid off def type qtr min sec kne ptso ptsd timo
## (int) (int) (chr) (chr) (chr) (int) (int) (int) (chr) (int) (int) (int)
## 1 260 42250 CHI BAL RUSH 1 14 52 NA 0 0 3
## 2 260 42251 CHI BAL PASS 1 14 31 NA 0 0 3
## 3 260 42252 CHI BAL RUSH 1 13 55 NA 0 0 3
## 4 260 42253 CHI BAL PASS 1 13 21 NA 0 0 3
## 5 260 42254 CHI BAL RUSH 1 12 54 NA 0 0 3
## 6 260 42255 CHI BAL RUSH 1 11 51 NA 0 0 3
## 7 260 42256 CHI BAL PASS 1 11 22 NA 0 0 3
## 8 260 42257 CHI BAL PASS 1 10 32 NA 0 0 3
## 9 260 42258 CHI BAL PASS 1 9 51 NA 0 0 3
## 10 260 42265 BAL CHI PASS 1 6 18 NA 0 3 3
## .. ... ... ... ... ... ... ... ... ... ... ... ...
## Variables not shown: timd (int), dwn (int), ytg (int), yfog (int), yds
## (int), fd (chr), fgxp (chr), pnet (int), pts (int), detail (chr), seas
## (int), wk (int), day (chr), v (chr), h (chr), stad (chr), temp (int),
## humd (int), wspd (int), wdir (chr), cond (chr), surf (chr), ou (dbl),
## sprv (dbl), ptsv (int), ptsh (int), winner (chr), spread (dbl),
## first_down (int), win (int), score_diff (int), secs_left (int), goforit
## (int), fkicker (chr), good (int), kneel_down (int), fdr.x (dbl), N.x
## (int), dwn.y (int), fdr.y (dbl), N.y (int), fdr (dbl)
# filter to specific game situation
plays %<>%
filter(dwn == 1,
qtr == 3,
yfog >= 40,
yfog <= 60,
score_diff >= 0,
score_diff <= 7)
plays
## Source: local data frame [3,560 x 54]
##
## gid pid off def type qtr min sec kne ptso ptsd timo
## (int) (int) (chr) (chr) (chr) (int) (int) (int) (chr) (int) (int) (int)
## 1 260 42331 BAL CHI RUSH 3 14 14 NA 3 3 3
## 2 260 42333 CHI BAL RUSH 3 13 18 NA 3 3 3
## 3 260 42345 CHI BAL PASS 3 8 15 NA 6 3 3
## 4 262 42640 CIN NE RUSH 3 10 24 NA 13 10 2
## 5 262 42642 CIN NE PASS 3 8 59 NA 13 10 2
## 6 262 42658 CIN NE RUSH 3 1 47 NA 16 10 2
## 7 263 42821 SEA CLE RUSH 3 1 27 NA 6 3 3
## 8 264 42936 DAL TB RUSH 3 13 48 NA 3 3 3
## 9 268 43578 CAR MIN PASS 3 8 24 NA 10 7 3
## 10 268 43602 CAR MIN RUSH 3 0 43 NA 17 13 3
## .. ... ... ... ... ... ... ... ... ... ... ... ...
## Variables not shown: timd (int), dwn (int), ytg (int), yfog (int), yds
## (int), fd (chr), fgxp (chr), pnet (int), pts (int), detail (chr), seas
## (int), wk (int), day (chr), v (chr), h (chr), stad (chr), temp (int),
## humd (int), wspd (int), wdir (chr), cond (chr), surf (chr), ou (dbl),
## sprv (dbl), ptsv (int), ptsh (int), winner (chr), spread (dbl),
## first_down (int), win (int), score_diff (int), secs_left (int), goforit
## (int), fkicker (chr), good (int), kneel_down (int), fdr.x (dbl), N.x
## (int), dwn.y (int), fdr.y (dbl), N.y (int), fdr (dbl)
# replicate "Count" table from part I
plays %>%
group_by(timo, timd) %>%
summarise(n = n(),
wino = sum(off == winner),
secs_left = mean(secs_left) / 60,
pct = wino / n) %>%
select(timo, timd, n) %>%
spread(timo, n) %>%
knitr::kable(., format = "markdown", caption = "Count")
timd |
0 |
1 |
2 |
3 |
0 |
NA |
NA |
NA |
2 |
1 |
2 |
1 |
10 |
16 |
2 |
1 |
16 |
57 |
315 |
3 |
2 |
34 |
342 |
2762 |
# replicate "Raw Win Rates" table from part I
plays %>%
group_by(timo, timd) %>%
summarise(n = n(),
wino = sum(off == winner),
secs_left = mean(secs_left) / 60,
pct = wino / n) %>%
filter(timo != 0, timd != 0) %>%
select(timo, timd, pct) %>%
spread(timo, pct) %>%
knitr::kable(., format = "markdown", caption = "Raw Win Rates")
timd |
1 |
2 |
3 |
1 |
1.0000000 |
0.7000000 |
0.5625000 |
2 |
0.6875000 |
0.7368421 |
0.7174603 |
3 |
0.7352941 |
0.6871345 |
0.6821144 |
# Replicate "Average Game Time" table from part I
plays %>%
group_by(timo, timd) %>%
summarise(n = n(),
wino = sum(win),
secs_left = mean(secs_left) / 60,
pct = wino / n) %>%
filter(timo != 0, timd != 0) %>%
select(timo, timd, n) %>%
spread(timo, n) %>%
knitr::kable(., format = "markdown", caption = "Average Game Time")
timd |
1 |
2 |
3 |
1 |
1 |
10 |
16 |
2 |
16 |
57 |
315 |
3 |
34 |
342 |
2762 |
# estimate model
wp_logit <- glm(win ~ yfog + score_diff + secs_left + timo + timd,
data = plays,
family = binomial(link = "logit"))
summary(wp_logit)
##
## Call:
## glm(formula = win ~ yfog + score_diff + secs_left + timo + timd,
## family = binomial(link = "logit"), data = plays)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9106 -1.2836 0.7060 0.8838 1.0931
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 9.221e-01 4.991e-01 1.847 0.0647 .
## yfog -1.841e-03 6.058e-03 -0.304 0.7612
## score_diff 1.684e-01 1.475e-02 11.414 <2e-16 ***
## secs_left -5.724e-05 1.488e-04 -0.385 0.7005
## timo -3.556e-02 9.606e-02 -0.370 0.7113
## timd -1.354e-01 1.074e-01 -1.261 0.2074
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4426.1 on 3559 degrees of freedom
## Residual deviance: 4287.7 on 3554 degrees of freedom
## AIC: 4299.7
##
## Number of Fisher Scoring iterations: 4
# get predicted probabilities for graphs in part II
wp_logit_pp <- data_frame(score_diff = 3,
yfog = 50,
secs_left = rep(1800:900, times = 4),
timo = 0,
timd = sort(rep(0:3, times = 901))) %>%
mutate(wp = predict(wp_logit, newdata = ., type = "response"),
wp_se = predict(wp_logit, newdata = ., type = "response", se.fit = TRUE)$se.fit)
ggplot(wp_logit_pp, aes(secs_left, wp, color = factor(timd))) +
geom_line() +
scale_x_reverse() +
scale_y_continuous(labels = scales::percent) +
labs(title = "Offense Win Probability by Time and Defensive Timeouts Left",
x = "Time Left",
y = "Win Probability",
color = "Defensive\nTimeouts Left") +
theme_bw()
wp_logit_pp %>%
filter(timd >= 2) %>%
ggplot(aes(secs_left, wp, color = factor(timd), fill = factor(timd),
ymin = wp - 1.96 * wp_se,
ymax = wp + 1.96 * wp_se)) +
geom_line() +
geom_ribbon(alpha = .2) +
scale_x_reverse() +
scale_y_continuous(labels = scales::percent) +
labs(title = "Offense Win Probability by Time and Defensive Timeouts Left",
x = "Time Left",
y = "Win Probability",
color = "Defensive\nTimeouts Left",
fill = "Defensive\nTimeouts Left") +
theme_bw()