Created
February 23, 2016 07:19
-
-
Save chrswt/7c297c6a5f2077dee0b1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "NBA Daily Fantasy" | |
output: pdf_document | |
--- | |
```{r} | |
library(plyr) | |
library(TTR) | |
library(tree) | |
mv_avg_stats <- c("MIN", "FG", "FGA", "X3P", "X3PA", "FT", "FTA", | |
"OR", "DR", "TOT", "A", "PF", "ST", "TO", "BL", "PTS") | |
mv_avg_colnames <- function(stats, k) { | |
paste(stats, "_avg_past_", k, sep = "") | |
} | |
mv_avg_window <- 20 | |
# TODO Match up player and team datasets. | |
nba_player <- read.csv("2016/player.csv") | |
nba_player$DATE <- as.Date(strptime(nba_player$DATE, f = "%m/%d/%Y")) | |
nba_team <- read.csv("2016/team.csv") | |
nba_team$DATE <- as.Date(strptime(nba_team$DATE, f = "%m/%d/%Y")) | |
mov_avg <- function(dest_df, source_df, stats, k) { | |
dest_colnames <- mv_avg_colnames(stats, k) | |
n <- min(k, nrow(source_df) - 1) | |
for (i in seq(1, length(stats))) { | |
s <- stats[i] | |
# Weight of current stat is 0 as we only want historical stats. | |
wts <- c(rep(1, n), 0) | |
dest_df[, dest_colnames[i]] <- WMA(source_df[, s], n = n+1, wts = wts) | |
} | |
return(dest_df) | |
} | |
indiv_train <- ddply(nba_player, .(PLAYER.FULL.NAME), function(df) { | |
# Sort by date for easy tabulation of moving averages. | |
df <- df[order(df$DATE), ] | |
# Features | |
train_df <- data.frame(venue = df$VENUE..R.H., date = df$DATE) | |
train_df$team <- df$OWN.TEAM | |
train_df$opp <- df$OPP.TEAM | |
train_df$pos <- df$POSITION | |
train_df$days_rest <- c(0, diff(df$DATE)) | |
train_df <- mov_avg(train_df, df, mv_avg_stats, mv_avg_window) | |
# Response variables | |
train_df$resp_pts <- df$PTS | |
train_df$resp_3pts <- df$X3P | |
train_df$resp_rebounds <- df$TOT | |
train_df$resp_assists <- df$A | |
train_df$resp_steals <- df$ST | |
train_df$resp_blocks <- df$BL | |
train_df$resp_turnovers <- df$TO | |
double_stats <- c("PTS", "TOT", "A", "ST", "BL") | |
train_df$resp_doubles <- rowSums(df[, double_stats] >= 10) | |
return(train_df) | |
}) | |
# NAs created by moving average for past k games (if not enough data) | |
indiv_train <- na.omit(indiv_train) | |
# Get aggregate stats of other players. | |
add_players_stats <- function(curr_train, join_x_only, join_y_only, join_both) { | |
join_x <- c(join_x_only, join_both) | |
join_y <- c(join_y_only, join_both) | |
id_frame <- indiv_train[, c("PLAYER.FULL.NAME", join_x)] | |
# Hacky. We remove team so that it doesnt get duplicated in the merged dataframe. | |
if (join_y_only == "opp") { | |
join_frame <- indiv_train[, -which(names(indiv_train) %in% c("team"))] | |
} else { | |
join_frame <- indiv_train | |
} | |
other_train <- merge(id_frame, join_frame, by.x = join_x, by.y = join_y) | |
other_train <- other_train[with(other_train, | |
PLAYER.FULL.NAME.x != PLAYER.FULL.NAME.y), ] | |
other_stats <- c("days_rest", | |
mv_avg_colnames(c("FG", "FGA", "X3P", "X3PA", | |
"FT", "FTA", "OR", "DR", "TOT", "A", | |
"PF", "ST", "TO", "BL", "PTS"), mv_avg_window)) | |
other_train <- ddply(other_train, | |
.(PLAYER.FULL.NAME.x, team, date), function(df) { | |
apply(df[, other_stats], 2, function(col) { | |
weighted.mean(col, w = df[, mv_avg_colnames("MIN", mv_avg_window)]) | |
}) | |
}) | |
all_train <- merge(curr_train, other_train, | |
by.x = c("PLAYER.FULL.NAME", join_x), | |
by.y = c("PLAYER.FULL.NAME.x", join_x)) | |
return(all_train) | |
} | |
all_train <- add_players_stats(indiv_train, "team", "team", "date") | |
all_train <- add_players_stats(all_train, "team", "opp", "date") | |
# TODO Verify that join is correct. | |
team_agg_stats <- c("TEAMS", "DATE", "POSS", "PACE", "OEFF", "DEFF") | |
team_agg_frame <- nba_team[, team_agg_stats] | |
all_train <- merge(all_train, team_agg_frame, | |
by.x = c("team", "date"), by.y = c("TEAMS", "DATE")) | |
all_train <- merge(all_train, team_agg_frame, | |
by.x = c("opp", "date"), by.y = c("TEAMS", "DATE")) | |
tree.pts <- tree(resp_pts ~ . - | |
team - | |
resp_rebounds - resp_steals - resp_assists - resp_blocks - | |
resp_doubles - resp_3pts - resp_turnovers - | |
PLAYER.FULL.NAME - date, | |
data = all_train, mindev = 0.001) | |
set.seed(1337) | |
pts.cv <- cv.tree(tree.pts) | |
opt.tree <- which(pts.cv$dev == min(pts.cv$dev)) | |
best.leaves <- min(pts.cv$size[opt.tree]) | |
pts.pruned = prune.tree(tree.pts, best=best.leaves) | |
infos <- summary(pts.pruned) | |
plot(pts.pruned) | |
text(pts.pruned) | |
sqrt(infos$dev / infos$df) | |
lm.pts <- lm(resp_doubles ~ . - | |
team - | |
resp_rebounds - resp_steals - resp_assists - resp_blocks - | |
resp_pts - resp_3pts - resp_turnovers - | |
PLAYER.FULL.NAME - date, | |
data = all_train) | |
summary(lm.pts)$sigma | |
# Neural networks | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment