library(tidyverse)
# fake some data
input_tbl <- lst(
MarketDay = as.Date(c("2016-01-01", "2016-01-02", "2016-01-03")),
HourEnding = 1:3,
FuelType = c("Coal", "Gas", "Hydro"),
Month = 1, # because only Jan dates
Region = "North"
) %>%
cross_df() %>%
mutate(
MarketDay = as.Date(MarketDay, origin = "1970-01-01"),
# skipping GenMISObyFuel, consumption_factor, withdrawal_factor
consumption = 10 * (1:n() + rnorm(n())), # changed this up to make later steps more clear
withdrawal = 20 * (1:n() + rnorm(n()))
)
input_tbl
#> # A tibble: 27 x 7
#> MarketDay HourEnding FuelType Month Region consumption withdrawal
#> <date> <int> <chr> <dbl> <chr> <dbl> <dbl>
#> 1 2016-01-01 1 Coal 1 North -11.3 23.8
#> 2 2016-01-02 1 Coal 1 North 9.01 16.9
#> 3 2016-01-03 1 Coal 1 North 42.1 47.2
#> 4 2016-01-01 2 Coal 1 North 33.0 94.1
#> 5 2016-01-02 2 Coal 1 North 43.0 88.3
#> 6 2016-01-03 2 Coal 1 North 55.4 137.
#> 7 2016-01-01 3 Coal 1 North 68.6 165.
#> 8 2016-01-02 3 Coal 1 North 82.0 153.
#> 9 2016-01-03 3 Coal 1 North 77.2 158.
#> 10 2016-01-01 1 Gas 1 North 84.7 171.
#> # … with 17 more rows
# we want total consumption & withdrawal per MarketDay & HourEnding.
# we'll do this as a separate tibble because it'll simplify the next step
summary_tbl <- input_tbl %>%
group_by(MarketDay, HourEnding) %>%
summarize(
hourly_total_consumption = sum(consumption),
hourly_total_withdrawal = sum(withdrawal)
) %>%
ungroup()
summary_tbl
#> # A tibble: 9 x 4
#> MarketDay HourEnding hourly_total_consumption hourly_total_withdrawal
#> <date> <int> <dbl> <dbl>
#> 1 2016-01-01 1 261. 549.
#> 2 2016-01-01 2 388. 753.
#> 3 2016-01-01 3 484. 941.
#> 4 2016-01-02 1 319. 680.
#> 5 2016-01-02 2 398. 877.
#> 6 2016-01-02 3 507. 987.
#> 7 2016-01-03 1 362. 696.
#> 8 2016-01-03 2 464. 947.
#> 9 2016-01-03 3 537. 1075.
# now we compute deltas by subtracting a lagged value from our value
# this would be trickier to implement and reason about if our hourly_total_* columns
# were already in our bigger input_tbl
summary_tbl <- summary_tbl %>%
arrange(MarketDay, HourEnding) %>% # to be extra sure our order is chronological
mutate(
delta_hourly_consumption = hourly_total_consumption - lag(hourly_total_consumption),
delta_hourly_withdrawal = hourly_total_withdrawal - lag(hourly_total_withdrawal)
)
# notice the NA's at the top here!
# there's nothing to subtract; the lag starts with NA, and x - NA = NA
summary_tbl
#> # A tibble: 9 x 6
#> MarketDay HourEnding hourly_total_co… hourly_total_wi… delta_hourly_co…
#> <date> <int> <dbl> <dbl> <dbl>
#> 1 2016-01-01 1 261. 549. NA
#> 2 2016-01-01 2 388. 753. 127.
#> 3 2016-01-01 3 484. 941. 96.4
#> 4 2016-01-02 1 319. 680. -165.
#> 5 2016-01-02 2 398. 877. 78.5
#> 6 2016-01-02 3 507. 987. 109.
#> 7 2016-01-03 1 362. 696. -144.
#> 8 2016-01-03 2 464. 947. 102.
#> 9 2016-01-03 3 537. 1075. 73.2
#> # … with 1 more variable: delta_hourly_withdrawal <dbl>
# now we can join this back in to our original table
tbl <- input_tbl %>%
left_join(summary_tbl, by = c("MarketDay", "HourEnding")) %>%
arrange(MarketDay, HourEnding)
tbl
#> # A tibble: 27 x 11
#> MarketDay HourEnding FuelType Month Region consumption withdrawal
#> <date> <int> <chr> <dbl> <chr> <dbl> <dbl>
#> 1 2016-01-01 1 Coal 1 North -11.3 23.8
#> 2 2016-01-01 1 Gas 1 North 84.7 171.
#> 3 2016-01-01 1 Hydro 1 North 188. 354.
#> 4 2016-01-01 2 Coal 1 North 33.0 94.1
#> 5 2016-01-01 2 Gas 1 North 122. 249.
#> 6 2016-01-01 2 Hydro 1 North 233. 410.
#> 7 2016-01-01 3 Coal 1 North 68.6 165.
#> 8 2016-01-01 3 Gas 1 North 161. 296.
#> 9 2016-01-01 3 Hydro 1 North 255. 481.
#> 10 2016-01-02 1 Coal 1 North 9.01 16.9
#> # … with 17 more rows, and 4 more variables:
#> # hourly_total_consumption <dbl>, hourly_total_withdrawal <dbl>,
#> # delta_hourly_consumption <dbl>, delta_hourly_withdrawal <dbl>
# too many columns to show in this format, so we'll focus on comsumption-only
tbl %>%
select(-Month, -Region, -FuelType, -contains("withdrawal"))
#> # A tibble: 27 x 5
#> MarketDay HourEnding consumption hourly_total_consu… delta_hourly_cons…
#> <date> <int> <dbl> <dbl> <dbl>
#> 1 2016-01-01 1 -11.3 261. NA
#> 2 2016-01-01 1 84.7 261. NA
#> 3 2016-01-01 1 188. 261. NA
#> 4 2016-01-01 2 33.0 388. 127.
#> 5 2016-01-01 2 122. 388. 127.
#> 6 2016-01-01 2 233. 388. 127.
#> 7 2016-01-01 3 68.6 484. 96.4
#> 8 2016-01-01 3 161. 484. 96.4
#> 9 2016-01-01 3 255. 484. 96.4
#> 10 2016-01-02 1 9.01 319. -165.
#> # … with 17 more rows
Created on 2019-10-30 by the reprex package (v0.3.0)