Skip to content

Instantly share code, notes, and snippets.

@USMortality
Last active September 19, 2024 09:09
Show Gist options
  • Save USMortality/5495f4fee01b5f2cc5c5c9d901fc5120 to your computer and use it in GitHub Desktop.
Save USMortality/5495f4fee01b5f2cc5c5c9d901fc5120 to your computer and use it in GitHub Desktop.
Umfragen & Wahlergebnisse [Deutschland]
library(rvest)
library(dplyr)
library(ggplot2)
library(tidyr)
library(scales)
library(purrr)
library(readr)
library(dplyr)
sf <- 2
options(vsc.dev.args = list(width = 600 * sf, height = 335 * sf, res = 72 * sf))
party_colors <- c(
"CDU/CSU" = "#000000",
"SPD" = "#E3000F",
"GRÜNE" = "#64BC5C",
"FDP" = "#FFCC00",
"DIE LINKE" = "#BE3075",
"AfD" = "#0C1C8C",
"FW" = "#f49800",
"BSW" = "#792350"
)
# Get historical data
ts1 <- read_csv("https://gist.githubusercontent.com/USMortality/a92fe8baacae09b8df03dd33aeea7d67/raw/data.csv")
# Get latet data
dates <- seq.Date(from = as.Date("2024-09-15"), to = Sys.Date(), by = "month")
process_table <- function(tbl) {
names(tbl) <- ifelse(names(tbl) == "", paste0("Unnamed", seq_along(tbl)), names(tbl))
tbl |>
select(1:(ncol(tbl))) |>
mutate(
across(
-1,
~ as.numeric(gsub(",", ".", gsub(" %", "", .))) / 100
)
)
}
result <- vector("list", length(dates))
for (i in seq_along(dates)) {
date <- dates[i]
print(date)
url <- paste0(
"https://web.archive.org/web/",
format(date, "%Y%m%d"),
"/https://www.wahlrecht.de/umfragen/"
)
# Attempt to read the webpage; if an error occurs, skip to the next iteration
tryCatch(
{
webpage <- read_html(url)
tables <- html_table(webpage, fill = TRUE)
df <- tables[[2]] %>%
process_table() %>%
pivot_longer(cols = 2:(last_col() - 1), names_to = "Variable", values_to = "Value") %>%
select(1, 3, 4) %>%
setNames(c("party", "institute", "value")) %>%
filter(!is.na(value))
df$date <- as.Date(dates[i])
result[[i]] <- df
},
error = function(e) {
message(sprintf("Error processing URL for date %s: %s", format(dates[i], "%Y-%m-%d"), e$message))
}
)
Sys.sleep(3)
}
# write.csv(ts, "~/Downloads/out.csv", row.names = FALSE)
ts <- bind_rows(ts1, compact(result)) |>
arrange(date, party) |>
unique()
# 12m span for LOESS
first_date <- min(ts$date)
last_date <- max(ts$date)
span <- min(365 / as.numeric(difftime(last_date, first_date, units = "days")), 1)
# Create the plot
ggplot(
ts |> filter(
party %in% names(party_colors),
# date >= as.Date("2017-09-24") - 30
),
aes(x = date, y = value, color = party, fill = party)
) +
geom_smooth(
method = "loess",
span = span,
# span = span * 3,
alpha = 0.2
) +
geom_point(size = 0.1) +
scale_y_continuous(labels = percent_format(scale = 100)) +
scale_color_manual(values = party_colors) +
scale_fill_manual(values = party_colors) +
geom_vline(
xintercept = as.Date(c("2013-09-22", "2017-09-24", "2021-09-26", "2025-09-28")),
linetype = "dashed", color = "black"
) +
labs(
x = "Datum",
y = "Stimmenanteil",
title = "Umfrageergebnisse [Deutschland]",
subtitle = "Gestrichelte Linien: Bundestagswahl · Quelle: wahlrecht.de · @USMortality",
color = "Partei",
fill = "Partei"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "right"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment