Created
July 10, 2023 07:44
-
-
Save mschnetzer/4f7f15d6f76542c75864871ee78394c1 to your computer and use it in GitHub Desktop.
Volksschüler:innen in Ganztagsschulen (https://twitter.com/matschnetzer/status/1678307172596195328?s=20)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
librarian::shelf(tidyverse, pdftools, geojsonsf, sf, ggtext, patchwork) | |
# PDF herunterladen und Seite 3 extrahieren | |
rawpdf <- pdf_text("https://www.parlament.gv.at/dokument/XXVII/AB/14482/imfname_1572157.pdf") | |
rawpdf <- rawpdf[3] | |
# Rohdaten in Zeilen umwandeln und Datenbereich auswählen | |
lines <- strsplit(rawpdf, '\n') |> unlist() |> str_trim(side = "left") | |
short <- lines[c(11:24,30:36,42:63)] | |
# Dataframe aus Zeilen erstellen und Daten bearbeiten | |
df <- as.data.frame(short) |> | |
separate(1, LETTERS, sep="\\s+(?=[0-9])|\\s+na") |> | |
map_df(str_remove, pattern = "\\.") |> | |
mutate(across(-A, as.numeric)) |> | |
select(region = A, where(function(x) any(!is.na(x)))) |> | |
drop_na() |> | |
filter(!str_detect(region, "Gesamt")) |> | |
mutate(region = str_remove(region, pattern = ".*?\\s+\\s+")) |> | |
pivot_longer(-region, names_to = "original", values_to = "number") |> | |
mutate(type = rep(c("VS","MS","SO","AHS-U"), 93), | |
day = rep(rep(c("Gesamt","Halbtag","Ganztag"), each = 4), 31)) |> | |
pivot_wider(id_cols = c(region,type), names_from = day, values_from = number) |> | |
mutate(gtshare = Ganztag/Gesamt*100, | |
htshare = Halbtag/Gesamt*100) | |
# Bezirkskarte für Österreich laden | |
bezmap <- geojson_sf("https://raw.githubusercontent.com/ginseng666/GeoJSON-TopoJSON-Austria/master/2021/simplified-99.9/bezirke_999_geo.json") | |
# Bildungsregionen den einzelnen politischen Bezirken zuordnen | |
bezmap <- bezmap |> | |
mutate(region = case_when( | |
iso %in% c(802,803) ~ "Vorarlberg‐Region Nord", | |
iso %in% c(804,801) ~ "Vorarlberg‐Region Süd", | |
iso %in% c(702,706,708) ~ "Tirol‐Region West", | |
iso %in% c(701,703,709) ~ "Tirol‐Region Mitte", | |
iso %in% c(704,705,707) ~ "Tirol‐Region Ost", | |
iso %in% c(501,503) ~ "Salzburg‐Region Nord", | |
iso %in% c(502,504:506) ~ "Salzburg‐Region Süd", | |
iso %in% c(902,903,910,911,920,921,922) ~ "Wien‐Region Ost", | |
iso %in% c(901,904:909,912:919,923) ~ "Wien‐Region West", | |
iso %in% c(612) ~ "STMK‐Region Liezen", | |
iso %in% c(611,621) ~ "STMK‐Region Obersteiermark Ost", | |
iso %in% c(614,620) ~ "STMK‐Region Obersteiermark West", | |
iso %in% c(617,622) ~ "STMK‐Region Oststeiermark", | |
iso %in% c(601,606,616) ~ "STMK‐Region Steirischer Zentralraum", | |
iso %in% c(623) ~ "STMK‐Region Südoststeiermark", | |
iso %in% c(603,610) ~ "STMK‐Region Südweststeiermark", | |
iso %in% c(407,417) ~ "OÖ‐Region Gmunden‐Vöcklabruck", | |
iso %in% c(404,412,414) ~ "OÖ‐Region Innviertel", | |
iso %in% c(401,410) ~ "OÖ‐Region Linz/L", | |
iso %in% c(406,411,413,416) ~ "OÖ‐Region Mühlviertel", | |
iso %in% c(402,409,415) ~ "OÖ‐Region Steyr‐Kirchdorf", | |
iso %in% c(403,405,408,418) ~ "OÖ‐Region Wels‐Grieskirchen‐Eferding", | |
iso %in% c(306,307,317) ~ "NÖ‐Region Baden", | |
iso %in% c(308,310,312,316) ~ "NÖ‐Region Mistelbach", | |
iso %in% c(302,314,319,321) ~ "NÖ‐Region Tulln", | |
iso %in% c(303,305,315,320) ~ "NÖ‐Region Waidhofen an der Ybbs", | |
iso %in% c(304,318,323) ~ "NÖ‐Region Wiener Neustadt", | |
iso %in% c(301,309,311,313,322,325) ~ "NÖ‐Region Zwettl", | |
iso %in% c(201,204,205,208,209) ~ "Kärnten‐Region Ost", | |
iso %in% c(202,203,206,207,210) ~ "Kärnten‐Region West", | |
iso %in% c(101:109) ~ "Burgenland‐Region", | |
)) |> | |
mutate(region = str_replace_all(region, "-", "-")) |> | |
drop_na() | |
# Daten nur für Volksschulen auswerten und Anteile in 10%-Klassen einteilen | |
vsmap <- bezmap |> left_join(df |> filter(type == "VS") |> select(region, gtshare)) |> | |
summarise(geometry = st_union(geometry), gtshare = mean(gtshare), .by = region) |> | |
mutate(anteil = cut(gtshare, breaks = seq(0,100,10), | |
labels = glue::glue(">{seq(0,90,10)} bis {seq(10,100,10)}%"))) | |
# Karte mit Bundesland-Grenzen erstellen | |
blmap <- bezmap |> mutate(bl = str_extract(iso, "^\\d")) |> summarise(geometry = st_union(geometry), .by = bl) | |
# Österreich-Karte mit gruppierten Anteilen | |
p1 <- vsmap |> | |
ggplot() + | |
geom_sf(aes(fill = anteil, group = region), linewidth = 0.1, color = "gray90") + | |
geom_sf(data = blmap, fill = "transparent", linewidth = 0.2, color = "black") + | |
annotate("richtext", x = 9.52, y = 49.1, fill = NA, label.colour = NA, | |
label = "<span style='font-size:26px;font-family:\"Playfair Display\";'>Ganztägig betreut</span><br><br>Anteil der Volksschüler:innen<br>in <span style='color:#bf4f51;'>Ganztagesschulen</span> nach<br>Bildungsregionen, 2022/23<br><br><span style='font-size:10px;'>Quelle: Parlament · Grafik: @matschnetzer</span>", | |
size = 4, hjust = 0, vjust = 1, family = "Roboto Condensed", lineheight = 1.1) + | |
scale_fill_manual(values = MetBrewer::met.brewer("Tam", direction = -1)[3:9], | |
guide = guide_legend(nrow = 1, label.position = "bottom", | |
keywidth = 4, keyheight = 0.6)) + | |
coord_sf(ylim = c(46.3, 49)) + | |
theme_minimal(base_family = "Roboto Condensed", base_size = 10) + | |
theme(legend.position = c(0.5, 0), | |
legend.title = element_blank(), | |
axis.title = element_blank(), | |
axis.text = element_blank(), | |
panel.grid = element_blank()) | |
# Top 5 Bildungsregionen mit höchsten GT-Anteil | |
p2 <- vsmap |> slice_max(gtshare, n = 5) |> | |
mutate(region = fct_reorder(region, gtshare)) |> | |
ggplot(aes(x = region, y = gtshare, color = anteil)) + | |
geom_point(size = 2) + | |
geom_segment(aes(xend = region, yend = 0), linewidth = 1.5) + | |
geom_text(y = 0, aes(label = glue::glue("{str_remove(region, pattern = '‐Region')}: {round(gtshare,0)}%")), | |
hjust = 0, size = 2.3, nudge_x = 0.35, color = "black", family = "Roboto Condensed") + | |
scale_y_continuous(labels = scales::percent_format(scale = 1, suffix = "%"), | |
limits = c(0, 60)) + | |
scale_color_manual(values = MetBrewer::met.brewer("Tam", direction = -1)[6:9]) + | |
labs(title = "5 höchste und niedrigste Quoten") + | |
coord_flip() + | |
theme_minimal(base_family = "Roboto Condensed", base_size = 9) + | |
theme(axis.title = element_blank(), | |
axis.text = element_blank(), | |
legend.position = "none", | |
plot.title = element_text(size = 9), | |
panel.grid.major.y = element_blank(), | |
panel.grid.major.x = element_line(linewidth = 0.2), | |
panel.grid.minor = element_blank()) | |
# 5 Bildungsregionen mit niedrigstem GT-Anteil | |
p3 <- vsmap |> slice_min(gtshare, n = 5) |> | |
mutate(region = fct_reorder(region, gtshare)) |> | |
ggplot(aes(x = region, y = gtshare, color = anteil)) + | |
geom_point(size = 2) + | |
geom_segment(aes(xend = region, yend = 0), linewidth = 1.5) + | |
geom_text(y = 0, aes(label = glue::glue("{str_remove(region, pattern = '‐Region')}: {round(gtshare,0)}%")), | |
hjust = 0, size = 2.3, nudge_x = 0.35, color = "black", family = "Roboto Condensed") + | |
scale_y_continuous(labels = scales::percent_format(scale = 1, suffix = "%"), | |
limits = c(0, 60)) + | |
scale_color_manual(values = MetBrewer::met.brewer("Tam", direction = -1)[3:4]) + | |
coord_flip() + | |
theme_minimal(base_family = "Roboto Condensed", base_size = 9) + | |
theme(axis.title = element_blank(), | |
axis.text.y = element_blank(), | |
legend.position = "none", | |
panel.grid.major.y = element_blank(), | |
panel.grid.major.x = element_line(linewidth = 0.2), | |
panel.grid.minor = element_blank()) | |
# Plots mit patchwork zusammenführen | |
p1 + (p2 / p3) + plot_layout(widths = c(2/3, 1/3)) | |
ggsave("gtschule.png", bg="white", width=10, height=4, dpi=320) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment