Last active
July 24, 2024 09:08
-
-
Save alekrutkowski/c2dc5669f0795bccfd92dcccdef925a8 to your computer and use it in GitHub Desktop.
Stata command to download and import a Eurostat dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Usage example: | |
// Either the parameter is Eurostat's "online data code": | |
// insheet_eurostat nama_10_gdp | |
// or the parameter is the custom URL generated with Eurostat GUI top right corner: | |
// Go to e.g. https://ec.europa.eu/eurostat/databrowser/view/nama_10_gdp/default/table?lang=en and | |
// do your selections by clicking first on the circle with plus (+) and then click | |
// ⤓ Download > ⚙ Options and other formats > Select file format: Text (.tsv, .csv), and Select: Data scope = "Data on this page only" and Compressed = No > Click: [⧉ Copy Link] button | |
// insheet_eurostat https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/nama_10_gdp/A.CP_MEUR.B1G+P3.EU27_2020+BE+BG+CZ+DK/?format=TSV&startPeriod=2021&endPeriod=2023 | |
// To use it like other commands, put it in your "personal" folder/directory | |
// (see: https://www.stata.com/support/faqs/programming/personal-ado-directory/ and https://www.stata.com/help.cgi?personal). | |
prog insheet_eurostat | |
qui des | |
if `r(k)'!=0 { | |
di as err "Memory is not empty!" | |
err 18 | |
} | |
tempfile temp_file | |
di as txt "Downloading `1'..." | |
if substr("`1'",1,5)=="https" { | |
copy "`1'" "`temp_file'" | |
} | |
else { | |
copy "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/`=strupper("`1'")'?format=TSV" "`temp_file'" | |
} | |
di "Importing into memory..." | |
insheet using "`temp_file'", names tab | |
qui tostring *, force replace | |
qui ds * | |
tokenize `r(varlist)' | |
loc first_col `1' | |
qui des | |
forval i = 2/`r(k)' { | |
qui ren ``i'' _`i' | |
} | |
loc first_col_names : var lab `first_col' | |
loc first_col_names = subinstr("`first_col_names'", ",", " ", .) | |
loc first_col_names = subinstr("`first_col_names'", "\TIME_PERIOD", "", 1) | |
loc time_pair_list "" | |
foreach vname of varlist _* { | |
loc time : var lab `vname' | |
loc name = subinstr("`vname'","_","",1) | |
loc time_pair_list `"`time_pair_list' `""`time'" if time=="`name'""'"' | |
} | |
di "Reshaping into long format (be patient, it is usually very slow)..." | |
qui reshape long _, i(`first_col') j(time) // I suggest replacing "reshape" with faster "greshape" -- it requires running first "ssc install gtools" | |
di "Reformatting time column..." | |
cap tostring time, replace | |
foreach pair of local time_pair_list { | |
qui replace time = `pair' | |
} | |
di "Splitting values and flags..." | |
qui split _ | |
drop _ | |
rename _1 value | |
rename _2 flags | |
qui destring value, replace force | |
cap destring time, replace // potentially -- will do it for years, will skip for e.g. quarters | |
qui split `first_col', parse(,) gen(_) | |
drop `first_col' | |
tokenize `first_col_names' | |
foreach n of varlist _* { | |
rename `n' `1' | |
macro shift | |
} | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment