Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active September 6, 2024 19:32
Show Gist options
  • Save USMortality/edd7db8a38e575cab1084c797221f680 to your computer and use it in GitHub Desktop.
Save USMortality/edd7db8a38e575cab1084c797221f680 to your computer and use it in GitHub Desktop.
Crude Mortality Rate by Age Group [Singapore]
library(readr)
library(tidyr)
library(ggplot2)
library(dplyr)
library(fable)
sf <- 2
width <- 900 * sf
height <- 450 * sf
options(vsc.dev.args = list(width = width, height = height, res = 72 * sf))
parse_age_groups <- function(df) {
df$age_group <- sub(" Under 1 Year", "0", df$age_group)
df$age_group <- sub(" 1 - 4 Years", "1-4", df$age_group)
df$age_group <- sub(" 5 - 9 Years", "5-9", df$age_group)
df$age_group <- sub(" 10 - 14 Years", "10-14", df$age_group)
df$age_group <- sub(" 15 - 19 Years", "15-19", df$age_group)
df$age_group <- sub(" 20 - 24 Years", "20-24", df$age_group)
df$age_group <- sub(" 25 - 29 Years", "25-29", df$age_group)
df$age_group <- sub(" 30 - 34 Years", "30-34", df$age_group)
df$age_group <- sub(" 35 - 39 Years", "35-39", df$age_group)
df$age_group <- sub(" 40 - 44 Years", "40-44", df$age_group)
df$age_group <- sub(" 45 - 49 Years", "45-49", df$age_group)
df$age_group <- sub(" 50 - 54 Years", "50-54", df$age_group)
df$age_group <- sub(" 55 - 59 Years", "55-59", df$age_group)
df$age_group <- sub(" 60 - 64 Years", "60-64", df$age_group)
df$age_group <- sub(" 65 - 69 Years", "65-69", df$age_group)
df$age_group <- sub(" 70 - 74 Years", "70-74", df$age_group)
df$age_group <- sub(" 75 - 79 Years", "75-79", df$age_group)
df$age_group <- sub(" 80 - 84 Years", "80-84", df$age_group)
df$age_group <- sub(" 85 - 89 Years", "85-89", df$age_group)
df$age_group <- sub(" 90 Years & Over", "90+", df$age_group)
df |> filter(!age_group %in% c(
" 70 Years & Over", " 75 Years & Over",
" 80 Years & Over", " 85 Years & Over"
))
}
# DL Data and cleanup
df <- read_delim(
"https://apify.mortality.watch/singstat-ts-M810141.csv",
delim = ",", skip = 10,
col_types = cols(.default = col_double(), `Data Series` = col_character())
) |> head(25)
ts <- df |>
pivot_longer(cols = 2:ncol(df), names_to = "year", values_to = "mortality") |>
setNames(c("age_group", "year", "mortality")) |>
filter(age_group != "Total Age Specific Death Rate", year >= 2000) |>
parse_age_groups() |>
mutate(
year = as.integer(trimws(year)),
# Convert to factor
age_group = factor(age_group,
levels = c(
"0", "1-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34",
"35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69",
"70-74", "75-79", "80-84", "85-89", "90+"
)
)
) |>
as_tsibble(index = year, key = age_group)
# Fit a linear model by age_group for the years 2010 to 2020
model <- ts |>
filter(year >= 2010 & year <= 2019) |>
model(lm = TSLM(mortality ~ year))
# Get the fitted values for the pre-2020 period using augment()
fitted_values <- model |>
augment() |>
select(year, age_group, .fitted)
# Get the forecasted values with prediction intervals for the next 4 years
forecasts <- model |>
forecast(h = "5 years", level = 95) |>
mutate(hl = hilo(mortality, level = 95)) |>
unpack_hilo(cols = hl) |>
as_tibble() |>
select(year, age_group, .mean, hl_lower, hl_upper)
# Combine the fitted values with the original data for the pre-2020 period
ts_augmented <- ts |>
left_join(fitted_values, by = c("year", "age_group"))
# Combine the forecast values with the dataset
ts_with_forecast <- ts_augmented |>
left_join(forecasts, by = c("year", "age_group"))
chart <- ggplot(ts_with_forecast, aes(x = year)) +
geom_line(aes(y = mortality), color = "red") +
geom_line(aes(y = .fitted), color = "black", linetype = "dashed") +
geom_line(aes(y = .mean), color = "black", linetype = "dashed") +
geom_ribbon(aes(ymin = `hl_lower`, ymax = `hl_upper`),
fill = "black", alpha = 0.2
) +
labs(
x = "Year", y = "Mortality",
title = "Crude Mortality Rate by Age Group [Singapore]",
subtitle = paste(c(
"Baseline: Lin. Regr. 2010-2019",
"Source: https://tablebuilder.singstat.gov.sg/table/TS/M810141"
), collapse = " · "),
color = "Year"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(vars(age_group), scales = "free")
ggplot2::ggsave(
filename = "chart.png",
plot = chart,
width = width,
height = height,
units = "px",
dpi = 72 * sf,
device = grDevices::png,
type = c("cairo")
)
# Excess
excess <- ts_with_forecast |>
filter(
age_group %in% c(
"50-54", "55-59", "60-64", "65-69", "70-74",
"75-79", "80-84", "85-89", "90+"
),
year >= 2010
) |>
mutate(excess = mortality / (ifelse(is.na(.mean), .fitted, .mean)) - 1)
chart <- ggplot(excess, aes(x = year)) +
geom_col(aes(y = excess), fill = "red") +
geom_text(aes(y = excess, label = scales::percent(excess, accuracy = 1)),
vjust = -0.5, size = 3
) +
labs(
x = "Year", y = "Mortality",
title = "Relative Excess Mortality by Age Group [Singapore]",
subtitle = paste(c(
"Baseline: Lin. Regr. 2010-2019",
"Source: https://tablebuilder.singstat.gov.sg/table/TS/M810141"
), collapse = " · "),
color = "Year"
) +
scale_y_continuous(labels = scales::percent_format()) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_wrap(vars(age_group))
ggplot2::ggsave(
filename = "chart2.png",
plot = chart,
width = width,
height = height,
units = "px",
dpi = 72 * sf,
device = grDevices::png,
type = c("cairo")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment