Skip to content

Instantly share code, notes, and snippets.

@chgeuer
Created August 15, 2024 19:48
Show Gist options
  • Save chgeuer/aa8487b1fc786cd70007716c526fd168 to your computer and use it in GitHub Desktop.
Save chgeuer/aa8487b1fc786cd70007716c526fd168 to your computer and use it in GitHub Desktop.

Wetterdaten

Mix.install([
  {:req, "~> 0.5.6"},
  {:explorer, "~> 0.9.0"},
  {:kino_explorer, "~> 0.1.21"},
  {:iconv, "~> 1.0"},
  {:floki, "~> 0.36.2"}
])

Section

require Explorer.DataFrame, as: DF
require Explorer.Series, as: S

Schema: https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/DESCRIPTION_obsgermany-climate-daily-kl_en.pdf

column name description uom type format
STATIONS_ID Station ID VARCHAR2
MESS_DATUM reference date YYYYMMDD
QN_3 quality level of the following columns numerical code
FX daily maximum of windgust m/s 9990.0
FM daily mean of wind velocity m/s 9990.0
QN_4 quality level of the following columns numerical code
RSK daily precipitation height mm 9990.0
RSKF precipitation form numerical code
SDK daily sunshine duration h 9990.0
SHK_TAG daily snow depth cm 9990.0
NM daily mean of cloud cover 1/8 9990.0
VPM daily mean of vapor pressure hPa 9990.0
PM daily mean of pressure hPa 9990.0
TMK daily mean of temperature °C 9990.0
UPM daily mean of relative humidity % 9990.0
TXK daily maximum of temperature at 2m height °C 9990.0
TNK daily minimum of temperature at 2m height °C 9990.0
TGK daily minimum of air temperature at 5 cm above ground °C 9990.0
defmodule DWDDate do
  def to_date(date) when is_integer(date), do: to_date(Integer.to_string(date))

  def to_date(<<year::binary-size(4), month::binary-size(2), day::binary-size(2)>>) do
    [year, month, day] |> Enum.join("-") |> Date.from_iso8601!()
  end
end

DWDDate.to_date(20_231_216)
df =
  "/mnt/c/chgeuer/stationen.csv"
  |> DF.from_csv!(
    header: true,
    delimiter: "\t",
    dtypes: [
      # {"Stations_id", :string}, 
      {"Bundesland", :category}
    ]
  )
  |> DF.transform(
    &%{
      von: DWDDate.to_date(&1["von_datum"]),
      bis: DWDDate.to_date(&1["bis_datum"])
    }
  )
  |> DF.discard(["von_datum", "bis_datum"])
  |> DF.rename(%{
    von: "start",
    bis: "end",
    Stationsname: "name",
    Stations_id: "id",
    Stationshoehe: "height",
    Bundesland: "country",
    geoLaenge: "lat",
    geoBreite: "lon"
  })
df |> DF.to_parquet!("/mnt/c/chgeuer/stationen.parquet")

df = DF.from_parquet!("/mnt/c/chgeuer/stationen.parquet")
defmodule DWDFiles do
  defp links_from_doc(url) do
    %Req.Response{status: 200, body: html} = Req.get!(url: url)

    {:ok, floki_document} = Floki.parse_document(html)

    floki_document
    |> Floki.find("a")
    |> Enum.map(fn {"a", [{"href", file}], [file]} -> file end)
    |> Enum.filter(&String.ends_with?(&1, "zip"))
  end

  def list_historic() do
    directory =
      "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/historical/"

    links_from_doc(directory)
    |> Enum.map(fn file ->
      %{"id" => id, "y1" => y1, "m1" => m1, "d1" => d1, "y2" => y2, "m2" => m2, "d2" => d2} =
        ~r/tageswerte_KL_(?<id>\d+)_(?<y1>\d{4})(?<m1>\d{2})(?<d1>\d{2})_(?<y2>\d{4})(?<m2>\d{2})(?<d2>\d{2})_hist\.zip/
        |> Regex.named_captures(file)

      {id, ""} = Integer.parse(id)

      %{
        id: id,
        from: [y1, m1, d1] |> Enum.join("-") |> Date.from_iso8601!(),
        to: [y2, m2, d2] |> Enum.join("-") |> Date.from_iso8601!(),
        url: directory <> file
      }
    end)
  end

  def list_current() do
    directory =
      "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/"

    links_from_doc(directory)
    |> Enum.map(fn file ->
      %{"id" => id} =
        ~r/tageswerte_KL_(?<id>\d+)_akt\.zip/
        |> Regex.named_captures(file)

      {id, ""} = Integer.parse(id)

      %{id: id, url: directory <> file}
    end)
  end

  def fetch_data(%{url: url}) do
    %Req.Response{status: 200, body: zip} = Req.get!(url: url)

    zip =
      zip
      |> Enum.into(%{}, fn {key, value} ->
        {to_string(key), :iconv.convert("iso8859-15", "utf-8", value)}
      end)

    {name, csv} =
      zip |> Enum.find(fn {name, _} -> String.starts_with?(name, "produkt") end)

    %{"id" => id, "y1" => y1, "m1" => m1, "d1" => d1, "y2" => y2, "m2" => m2, "d2" => d2} =
      ~r/produkt_klima_tag_(?<y1>\d{4})(?<m1>\d{2})(?<d1>\d{2})_(?<y2>\d{4})(?<m2>\d{2})(?<d2>\d{2})_(?<id>\d+)\.txt/
      |> Regex.named_captures(name)

    {id, ""} = Integer.parse(id)

    %{
      id: id,
      from: [y1, m1, d1] |> Enum.join("-") |> Date.from_iso8601!(),
      to: [y2, m2, d2] |> Enum.join("-") |> Date.from_iso8601!(),
      csv: csv
    }
  end
end
# "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/daily/kl/recent/"

%{csv: current_csv} =
  DWDFiles.list_current()
  |> Enum.find(fn f -> f.id == 1078 end)
  |> DWDFiles.fetch_data()

current =
  current_csv
  |> DF.load_csv!(
    header: true,
    delimiter: ";",
    infer_schema_length: 10,
    nil_values: for(n <- 0..10, do: String.duplicate(" ", n) <> "-999")
    # dtypes: [MESS_DATUM: :string]
  )
  |> DF.rename_with(&String.trim/1)
  |> DF.transform(
    &%{
      date: DWDDate.to_date(&1["MESS_DATUM"])
    }
  )
  |> DF.discard(["eor", "QN_3", "QN_4"])
  |> DF.rename(%{
    "STATIONS_ID" => "Station ID",
    "FX" => "FX - daily maximum of windgust (m/s)",
    "FM" => "FM - daily mean of wind velocity (m/s)",
    "RSK" => "RSK - daily precipitation height mm",
    "RSKF" => "RSKF - precipitation form",
    "SDK" => "SDK - daily sunshine duration",
    "SHK_TAG" => "SHK_TAG - daily snow depth cm",
    "NM" => "NM - daily mean of cloud cover 1/8",
    "VPM" => "VPM - daily mean of vapor pressure hPa",
    "PM" => "PM - daily mean of pressure hPa",
    "TMK" => "TMK - daily mean of temperature °C",
    "UPM" => "UPM - daily mean of relative humidity %",
    "TXK" => "TXK - daily maximum of temperature at 2m height °C",
    "TNK" => "TNK - daily minimum of temperature at 2m height °C",
    "TGK" => "TGK - daily minimum of air temperature at 5 cm above ground °C"
  })
current |> DF.filter(date == Date.new!(2024, 08, 14)) |> DF.to_rows() |> hd() |> Kino.Tree.new()
Req.get!(url: "https://api.brightsky.dev/weather?lat=51.296&lon=6.7686&date=2024-08-14").body[
  "weather"
]
|> DF.new()
df |> DF.filter(id == 1078)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment