Skip to content

Instantly share code, notes, and snippets.

@alekrutkowski
Created September 12, 2024 15:22
Show Gist options
  • Save alekrutkowski/5b3e23e8e851857ab26c10e1ce286d6b to your computer and use it in GitHub Desktop.
Save alekrutkowski/5b3e23e8e851857ab26c10e1ce286d6b to your computer and use it in GitHub Desktop.
ESSPROS social protection expenditure real growth decomposition including early estimates
renameColumns <- function(dt, ...) {
pairs <-
substitute(list(...)) %>%
as.list %>%
tail(-1) %>%
lapply(. %>% as.list %>% tail(-1) %>% rev)
from <-
pairs %>%
sapply(. %>% .[[1]] %>% as.character)
to <-
pairs %>%
sapply(. %>% .[[2]] %>% as.character)
setnames(dt, from, to)
}
renameColumnsWithFunction <- function(dt, FUN)
setnames(dt,
colnames(dt),
colnames(dt) %>% sapply(FUN))
addUp <- function(dt, colnames.)
dt %>%
.[, paste0(colnames.,collapse="") :=
Reduce(`+`,
c(mget(colnames.)))] %>%
.[, (colnames.) := NULL]
v <- function(...) # to avoid typing single or double quotes
substitute(list(...)) %>%
as.list %>%
tail(-1) %>%
as.character
library(magrittr)
library(data.table)
library(openxlsx2)
library(rvest)
source('helper_functions.R')
EU <-
v(AT,BE,BG,CY,CZ,DE,DK,EE,EL,ES,FI,FR,HR,HU,IE,
IT,LT,LU,LV,MT,NL,PL,PT,RO,SE,SI,SK,
EU27_2020)
# Import early estimates --------------------------------------------------
WEB_PAGE <-
"https://ec.europa.eu/eurostat/web/social-protection/database/early-estimates"
URL <-
WEB_PAGE %>%
read_html %>%
html_elements("a[href$='.xlsx']") %>% # Select all <a> tags with href ending in .xlsx
html_attr("href") %>% # Extract the href attribute (the link itself)
url_absolute(WEB_PAGE) # Handle relative URLs by converting them to absolute ones if needed
if (is.character(URL) && length(URL)==1 && URL!="")
message('Found Excel file URL:\n',URL) else
stop('Could not find URL of Excel file in the early estimates web page!')
LATEST_YEAR <-
read_xlsx(URL, sheet="MIO_NAC", rows=2, cols=6,
col_names=FALSE) %>%
as.integer
addUpColumns <- function(dt)
dt %>%
addUp(v(HOUSE,EXCLU)) %>%
addUp(v(OLD,SURVIV)) %>%
addUp(v(SICK,DISA)) # %>% "other" too small to report
# .[, OTHER :=
# SPBENEFNOREROUTE - HOUSEEXCLU - OLDSURVIV - SICKDISA - FAM - UNEMPLOY]
early_estim <-
URL %>%
read_xlsx(sheet="MIO_NAC", start_row=4, cols=1:10) %>%
set_colnames(colnames(.) %>%
ifelse(is.na(.),'geo_labels',.)) %>%
as.data.table %>%
.[geo_labels != 'EU27_2020'] %>% # empty EU27
rbind(
URL %>%
read_xlsx(sheet="MIO_EUR", cols=1:10, rows=4:5) %>% # not empty EU27
set_colnames(colnames(.) %>%
ifelse(is.na(.),'geo_labels',.))
) %>%
.[, geo_labels := geo_labels %>%
ifelse(grepl('EU27_2020',.),'European Union - 27 countries (from 2020)',.)] %>%
renameColumnsWithFunction(. %>%
trimws %>%
gsub('\n',"",.,fixed=TRUE) %>%
ifelse(grepl('TOTAL',.),'SPBENEFNOREROUTE',.)) %>%
renameColumns("Sickness/Health care" -> SICK,
Disability -> DISA,
"Old age" -> OLD,
Survivors -> SURVIV,
"Family/Children" -> FAM,
Unemployment -> UNEMPLOY,
Housing -> HOUSE,
"Social exclusion n.e.c." -> EXCLU) %>%
addUpColumns %>%
melt(id.vars='geo_labels', variable.name='spdeps',
value.name='value_', variable.factor=FALSE) %>%
.[, time := LATEST_YEAR] %>%
merge(importLabels('geo'), by='geo_labels') %>%
.[, geo_labels := NULL] %>%
.[geo %in% EU]
# Import time series for expenditure --------------------------------------
expend <-
importData('spr_exp_sum',
list(unit='MIO_NAC',
spdeps=v(SPBENEFNOREROUTE, SICK, DISA,
OLD, SURVIV, FAM,
UNEMPLOY, HOUSE, EXCLU))) %>%
as.data.table %>%
.[geo != 'EU27_2020'] %>%
rbind(
importData('spr_exp_sum',
list(unit='MIO_EUR',
geo='EU27_2020',
spdeps=v(SPBENEFNOREROUTE, SICK, DISA,
OLD, SURVIV, FAM,
UNEMPLOY, HOUSE, EXCLU)))
) %>%
.[, v(flags_,freq,unit) := NULL] %>%
renameColumns(TIME_PERIOD -> time) %>%
.[, time := time %>% as.character %>% as.integer] %>%
.[geo %in% EU] %>%
dcast(geo + time ~ spdeps,
fun.aggregate=identity,
value.var='value_',
fill=NA_real_) %>%
addUpColumns %>%
melt(id.vars=v(geo,time), variable.name='spdeps',
value.name='value_', variable.factor=FALSE)
stacked_expend <-
rbind(expend, early_estim, fill=TRUE)
# Import HICP -------------------------------------------------------------
hicp_data <-
importData('prc_hicp_aind',
list(coicop='CP00', # All-items HICP
unit='INX_A_AVG' # Annual average index
)) %>%
as.data.table %>%
.[, hicp := value_/100] %>%
.[, v(flags_,freq,coicop,unit,value_) := NULL] %>%
renameColumns(TIME_PERIOD -> time) %>%
.[, time := time %>% as.character %>% as.integer]
# Real growth decomposition -----------------------------------------------
DATA <-
merge(stacked_expend, hicp_data,
by=v(geo,time))
decomposeGrowth <- function(dt)
copy(dt) %>%
.[, real_value := value_/hicp] %>%
.[, D_real_value := collapse::D(real_value, t=time)
, by = .(geo,spdeps)] %>%
.[, D_real_value_TOTAL :=
D_real_value[spdeps=='SPBENEFNOREROUTE']
, by=.(geo,time)] %>% # across spdeps components
.[, D_real_value_percent := 100*D_real_value/collapse::L(real_value, t=time)
, by = .(geo,spdeps)] %>%
.[, D_real_value_percent_TOTAL :=
D_real_value_percent[spdeps=='SPBENEFNOREROUTE']
, by=.(geo,time)] %>% # across spdeps components
.[, D_real_value_percentage_points :=
ifelse(D_real_value_TOTAL==0, 0,
D_real_value_percent_TOTAL * D_real_value / D_real_value_TOTAL)] %>%
.[, spdeps := spdeps %>% ifelse(.=='SPBENEFNOREROUTE','TOTAL',.)]
componentsToColumns <- function(dt)
dt %>%
dcast(geo + time ~ spdeps,
fun.aggregate=identity,
value.var='D_real_value_percentage_points',
fill=NA_real_) %>%
setcolorder('TOTAL',after=ncol(.))
EU_Agg <-
DATA %>%
.[geo=='EU27_2020'] %>%
decomposeGrowth %>%
componentsToColumns
MS_LongTerm <-
DATA %>%
.[geo != 'EU27_2020'] %>%
.[time==LATEST_YEAR | time==LATEST_YEAR-6] %>%
.[, time := time %>% ifelse(.!=max(.), max(.)-1, .)] %>%
decomposeGrowth %>%
componentsToColumns %>%
.[time==LATEST_YEAR] %>%
.[, time := NULL] %>%
setorder(-TOTAL)
MS_ShortTerm <-
DATA %>%
.[geo != 'EU27_2020'] %>%
.[time==LATEST_YEAR | time==LATEST_YEAR-1] %>%
decomposeGrowth %>%
componentsToColumns %>%
.[time==LATEST_YEAR] %>%
.[, time := NULL] %>%
setorder(-TOTAL)
list(EU_Agg, MS_LongTerm, MS_ShortTerm) %>%
set_names(c('EU_Agg',
paste('MS',LATEST_YEAR-6,LATEST_YEAR,sep='_'),
paste('MS',LATEST_YEAR-1,LATEST_YEAR,sep='_'))) %>%
write_xlsx('ESSPROS social protection expenditure decomposition with early estimates.xlsx')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment