Raw data pull
library(StatsBombR)
library(worldfootballR)
library(dplyr)
library(janitor)
library(tibble)
sb_free_comps <- FreeCompetitions()
sb_comp <- filter(
sb_free_comps,
competition_name == "Women's World Cup",
season_name == '2023'
)
sb_matches <- FreeMatches(sb_comp)
sb_shots <- free_allevents(MatchesDF = sb_matches) |>
as_tibble() |>
clean_names() |>
filter(!is.na(shot_statsbomb_xg))
fb_match_urls <- fb_match_urls(
country = '',
gender = 'F',
season_end_year = 2023,
tier = '',
non_dom_league_url = 'https://fbref.com/en/comps/106/history/Womens-World-Cup-Seasons'
)
opta_shots <- fb_match_shooting(fb_match_urls) |>
as_tibble() |>
clean_names()
Extract non-penalty shots and bring the data sets together.
sb_np_shots <- sb_shots |>
filter(
shot_type_name != 'Penalty'
) |>
transmute(
xg = shot_statsbomb_xg,
g = shot_outcome_name == 'Goal'
)
opta_np_shots <- opta_shots |>
mutate(across(x_g, as.numeric)) |>
## heuristic for penalties
filter(
!(distance == '13' & x_g == 0.79)
) |>
transmute(
xg = x_g,
g = outcome == 'Goal'
)
np_shots <- bind_rows(
sb_np_shots |> mutate(source = 'StatsBomb'),
opta_np_shots |> mutate(source = 'Opta')
)