Skip to content

Instantly share code, notes, and snippets.

@iangow
Created September 6, 2024 04:25
Show Gist options
  • Save iangow/17d0177444437e055a22194d716daba7 to your computer and use it in GitHub Desktop.
Save iangow/17d0177444437e055a22194d716daba7 to your computer and use it in GitHub Desktop.
Code to get player lists from AFL tables
library(httr2) # request(), req_*(), resp_body_html()
library(rvest) # html_elements(), html_table()
library(tidyverse)
extract_team_ids <- function(link_text) {
matches <- str_match(link_text, '<a href="teams/(.*)_idx.html">(.*)</a>')
team_id <- matches[, 2]
team_name <- matches[, 3]
tibble(team_id, team_name)
}
all_teams_url <- "https://afltables.com/afl/afl_index.html"
teams <-
map_dfr(team_urls, extract_team_ids) |>
filter(team_id != "allteams")
get_team_list <- function(team_id) {
url <- str_c("https://afltables.com/afl/stats/alltime/",
team_id, ".html")
resp <-
request(url) |>
req_user_agent(getOption("HTTPUserAgent")) |>
req_perform() |>
resp_body_html() |>
html_elements("body")
make_range <- function(start, end) {
start
}
df <-
resp[[1]] |>
html_table() |>
mutate(DOB = as.Date(DOB)) |>
filter(!is.na(DOB)) |>
separate_wider_regex(cols = c(Debut, Last),
patterns = c(years = "^[0-9]+", "y ", days = "[0-9]+", "d$"),
names_sep = "_",
too_few = "align_start") |>
mutate(debut = if_else(is.na(Debut_days),
DOB + years(Debut_years),
DOB + years(Debut_years) + days(Debut_days)),
last = if_else(is.na(Debut_days),
DOB + years(Last_years),
DOB + years(Last_years) + days(Last_days))) |>
arrange(desc(last)) |>
select(-matches("^(Debut|Last)", ignore.case = FALSE)) |>
mutate(team_id = team_id, .before = 1)
df
}
team_lists <-
map(teams$team_id, get_team_list) |>
list_rbind()
seasons <-
team_lists |>
select(team_id, Player, DOB, Seasons) |>
separate_longer_delim(Seasons, ",") |>
mutate(Seasons = str_trim(Seasons)) |>
separate_wider_regex(Seasons,
patterns = c(first_season = "^[0-9]+", "-", last_season = "[0-9]+$"),
too_few = "align_start") |>
mutate(last_season = coalesce(last_season, first_season)) |>
rowwise() |>
mutate(seasons = list(first_season:last_season)) |>
unnest(seasons) |>
select(-first_season, -last_season)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment