Created
September 12, 2024 15:22
-
-
Save alekrutkowski/5b3e23e8e851857ab26c10e1ce286d6b to your computer and use it in GitHub Desktop.
ESSPROS social protection expenditure real growth decomposition including early estimates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
renameColumns <- function(dt, ...) { | |
pairs <- | |
substitute(list(...)) %>% | |
as.list %>% | |
tail(-1) %>% | |
lapply(. %>% as.list %>% tail(-1) %>% rev) | |
from <- | |
pairs %>% | |
sapply(. %>% .[[1]] %>% as.character) | |
to <- | |
pairs %>% | |
sapply(. %>% .[[2]] %>% as.character) | |
setnames(dt, from, to) | |
} | |
renameColumnsWithFunction <- function(dt, FUN) | |
setnames(dt, | |
colnames(dt), | |
colnames(dt) %>% sapply(FUN)) | |
addUp <- function(dt, colnames.) | |
dt %>% | |
.[, paste0(colnames.,collapse="") := | |
Reduce(`+`, | |
c(mget(colnames.)))] %>% | |
.[, (colnames.) := NULL] | |
v <- function(...) # to avoid typing single or double quotes | |
substitute(list(...)) %>% | |
as.list %>% | |
tail(-1) %>% | |
as.character |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(magrittr) | |
library(data.table) | |
library(openxlsx2) | |
library(rvest) | |
source('helper_functions.R') | |
EU <- | |
v(AT,BE,BG,CY,CZ,DE,DK,EE,EL,ES,FI,FR,HR,HU,IE, | |
IT,LT,LU,LV,MT,NL,PL,PT,RO,SE,SI,SK, | |
EU27_2020) | |
# Import early estimates -------------------------------------------------- | |
WEB_PAGE <- | |
"https://ec.europa.eu/eurostat/web/social-protection/database/early-estimates" | |
URL <- | |
WEB_PAGE %>% | |
read_html %>% | |
html_elements("a[href$='.xlsx']") %>% # Select all <a> tags with href ending in .xlsx | |
html_attr("href") %>% # Extract the href attribute (the link itself) | |
url_absolute(WEB_PAGE) # Handle relative URLs by converting them to absolute ones if needed | |
if (is.character(URL) && length(URL)==1 && URL!="") | |
message('Found Excel file URL:\n',URL) else | |
stop('Could not find URL of Excel file in the early estimates web page!') | |
LATEST_YEAR <- | |
read_xlsx(URL, sheet="MIO_NAC", rows=2, cols=6, | |
col_names=FALSE) %>% | |
as.integer | |
addUpColumns <- function(dt) | |
dt %>% | |
addUp(v(HOUSE,EXCLU)) %>% | |
addUp(v(OLD,SURVIV)) %>% | |
addUp(v(SICK,DISA)) # %>% "other" too small to report | |
# .[, OTHER := | |
# SPBENEFNOREROUTE - HOUSEEXCLU - OLDSURVIV - SICKDISA - FAM - UNEMPLOY] | |
early_estim <- | |
URL %>% | |
read_xlsx(sheet="MIO_NAC", start_row=4, cols=1:10) %>% | |
set_colnames(colnames(.) %>% | |
ifelse(is.na(.),'geo_labels',.)) %>% | |
as.data.table %>% | |
.[geo_labels != 'EU27_2020'] %>% # empty EU27 | |
rbind( | |
URL %>% | |
read_xlsx(sheet="MIO_EUR", cols=1:10, rows=4:5) %>% # not empty EU27 | |
set_colnames(colnames(.) %>% | |
ifelse(is.na(.),'geo_labels',.)) | |
) %>% | |
.[, geo_labels := geo_labels %>% | |
ifelse(grepl('EU27_2020',.),'European Union - 27 countries (from 2020)',.)] %>% | |
renameColumnsWithFunction(. %>% | |
trimws %>% | |
gsub('\n',"",.,fixed=TRUE) %>% | |
ifelse(grepl('TOTAL',.),'SPBENEFNOREROUTE',.)) %>% | |
renameColumns("Sickness/Health care" -> SICK, | |
Disability -> DISA, | |
"Old age" -> OLD, | |
Survivors -> SURVIV, | |
"Family/Children" -> FAM, | |
Unemployment -> UNEMPLOY, | |
Housing -> HOUSE, | |
"Social exclusion n.e.c." -> EXCLU) %>% | |
addUpColumns %>% | |
melt(id.vars='geo_labels', variable.name='spdeps', | |
value.name='value_', variable.factor=FALSE) %>% | |
.[, time := LATEST_YEAR] %>% | |
merge(importLabels('geo'), by='geo_labels') %>% | |
.[, geo_labels := NULL] %>% | |
.[geo %in% EU] | |
# Import time series for expenditure -------------------------------------- | |
expend <- | |
importData('spr_exp_sum', | |
list(unit='MIO_NAC', | |
spdeps=v(SPBENEFNOREROUTE, SICK, DISA, | |
OLD, SURVIV, FAM, | |
UNEMPLOY, HOUSE, EXCLU))) %>% | |
as.data.table %>% | |
.[geo != 'EU27_2020'] %>% | |
rbind( | |
importData('spr_exp_sum', | |
list(unit='MIO_EUR', | |
geo='EU27_2020', | |
spdeps=v(SPBENEFNOREROUTE, SICK, DISA, | |
OLD, SURVIV, FAM, | |
UNEMPLOY, HOUSE, EXCLU))) | |
) %>% | |
.[, v(flags_,freq,unit) := NULL] %>% | |
renameColumns(TIME_PERIOD -> time) %>% | |
.[, time := time %>% as.character %>% as.integer] %>% | |
.[geo %in% EU] %>% | |
dcast(geo + time ~ spdeps, | |
fun.aggregate=identity, | |
value.var='value_', | |
fill=NA_real_) %>% | |
addUpColumns %>% | |
melt(id.vars=v(geo,time), variable.name='spdeps', | |
value.name='value_', variable.factor=FALSE) | |
stacked_expend <- | |
rbind(expend, early_estim, fill=TRUE) | |
# Import HICP ------------------------------------------------------------- | |
hicp_data <- | |
importData('prc_hicp_aind', | |
list(coicop='CP00', # All-items HICP | |
unit='INX_A_AVG' # Annual average index | |
)) %>% | |
as.data.table %>% | |
.[, hicp := value_/100] %>% | |
.[, v(flags_,freq,coicop,unit,value_) := NULL] %>% | |
renameColumns(TIME_PERIOD -> time) %>% | |
.[, time := time %>% as.character %>% as.integer] | |
# Real growth decomposition ----------------------------------------------- | |
DATA <- | |
merge(stacked_expend, hicp_data, | |
by=v(geo,time)) | |
decomposeGrowth <- function(dt) | |
copy(dt) %>% | |
.[, real_value := value_/hicp] %>% | |
.[, D_real_value := collapse::D(real_value, t=time) | |
, by = .(geo,spdeps)] %>% | |
.[, D_real_value_TOTAL := | |
D_real_value[spdeps=='SPBENEFNOREROUTE'] | |
, by=.(geo,time)] %>% # across spdeps components | |
.[, D_real_value_percent := 100*D_real_value/collapse::L(real_value, t=time) | |
, by = .(geo,spdeps)] %>% | |
.[, D_real_value_percent_TOTAL := | |
D_real_value_percent[spdeps=='SPBENEFNOREROUTE'] | |
, by=.(geo,time)] %>% # across spdeps components | |
.[, D_real_value_percentage_points := | |
ifelse(D_real_value_TOTAL==0, 0, | |
D_real_value_percent_TOTAL * D_real_value / D_real_value_TOTAL)] %>% | |
.[, spdeps := spdeps %>% ifelse(.=='SPBENEFNOREROUTE','TOTAL',.)] | |
componentsToColumns <- function(dt) | |
dt %>% | |
dcast(geo + time ~ spdeps, | |
fun.aggregate=identity, | |
value.var='D_real_value_percentage_points', | |
fill=NA_real_) %>% | |
setcolorder('TOTAL',after=ncol(.)) | |
EU_Agg <- | |
DATA %>% | |
.[geo=='EU27_2020'] %>% | |
decomposeGrowth %>% | |
componentsToColumns | |
MS_LongTerm <- | |
DATA %>% | |
.[geo != 'EU27_2020'] %>% | |
.[time==LATEST_YEAR | time==LATEST_YEAR-6] %>% | |
.[, time := time %>% ifelse(.!=max(.), max(.)-1, .)] %>% | |
decomposeGrowth %>% | |
componentsToColumns %>% | |
.[time==LATEST_YEAR] %>% | |
.[, time := NULL] %>% | |
setorder(-TOTAL) | |
MS_ShortTerm <- | |
DATA %>% | |
.[geo != 'EU27_2020'] %>% | |
.[time==LATEST_YEAR | time==LATEST_YEAR-1] %>% | |
decomposeGrowth %>% | |
componentsToColumns %>% | |
.[time==LATEST_YEAR] %>% | |
.[, time := NULL] %>% | |
setorder(-TOTAL) | |
list(EU_Agg, MS_LongTerm, MS_ShortTerm) %>% | |
set_names(c('EU_Agg', | |
paste('MS',LATEST_YEAR-6,LATEST_YEAR,sep='_'), | |
paste('MS',LATEST_YEAR-1,LATEST_YEAR,sep='_'))) %>% | |
write_xlsx('ESSPROS social protection expenditure decomposition with early estimates.xlsx') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment