I tried searching for the data.table
functions to trunc dates.
round.IDate
for the weeks (IDateTime.R#81
) seems inaccurate: first week of year have a 6 days instead 7 (fix: should be yday(x) - 1L)
> rle(unclass(round(as.IDate(0:21), "week")))
Run Length Encoding
lengths: int [1:4] 6 7 7 2
values : int [1:4] 0 7 14 21
Add trunc.IDate
(may be replace round.IDate
). For the "weeks" unit add additional option for the start: year
, monday
, sunday
.
Posible implementaiton:
trunc_week <- function(x, start = c("year", "monday", "sunday")) {
switch(
match.arg(start),
year = {
l = as.POSIXlt(x)
w = (l$yday %/% 7L) * 7L
x - l$yday + w
},
# lubridate::floor_date(x, "week", week_start = 1)
monday = as.IDate(7 * ((unclass(x) - 4L) %/% 7) + 4L),
# lubridate::floor_date(x, "week", week_start = 0)
sunday = as.IDate(7 * ((unclass(x) - 3L) %/% 7) + 3L)
)
}
Utility function to print bench results:
print.bench = function(x, ...) {
cols = c("expression", "n", "min", "median", "mem_alloc", "n_itr", "n_gc")
r = as.data.table(x)[, ..cols]
r[, expression := sapply(expression, deparse)]
r[, relative := round(as.numeric(median / min(median)), 3), by = c("n")]
r[]
Data to process.
set.seed(42)
x = seq.Date(as.Date("1900-01-01"), as.Date("2020-03-01"), by = "day")
x = as.IDate(x)
trunc_week_old = function(x) {
# IDateTime.R#81
# bug: first week of year have 6 days instead 7
# fix: should be yday(x) - 1L
round(x, "year") + 7L * ((yday(x) - 1L) %/% 7L)
}
trunc_week_new = function(x) {
l = as.POSIXlt(x)
w = (l$yday %/% 7L) * 7L
x - l$yday + w
}
r = bench::press(
n = c(100, 1000, 10000, 100000, 1000000),
{
d = sample(x, size = n, replace = TRUE)
bench::mark(trunc_week_old(d), trunc_week_new(d))
}
)
print.bench(r)
#> expression n min median mem_alloc n_itr n_gc relative
#> <char> <num> <bench_time> <bench_time> <bench_bytes> <int> <num> <num>
#> 1: trunc_week_old(d) 1e+02 225.62µs 232.11µs 146.73KB 1950 2 7.050
#> 2: trunc_week_new(d) 1e+02 30.57µs 32.92µs 40.53KB 9996 4 1.000
#> 3: trunc_week_old(d) 1e+03 1.39ms 1.47ms 213KB 318 2 13.605
#> 4: trunc_week_new(d) 1e+03 106.56µs 108.29µs 59.2KB 4151 4 1.000
#> 5: trunc_week_old(d) 1e+04 13.44ms 13.88ms 2.06MB 33 1 15.994
#> 6: trunc_week_new(d) 1e+04 859.88µs 867.72µs 586.55KB 534 5 1.000
#> 7: trunc_week_old(d) 1e+05 134.9ms 137.59ms 20.6MB 2 2 15.515
#> 8: trunc_week_new(d) 1e+05 8.39ms 8.87ms 5.72MB 41 9 1.000
#> 9: trunc_week_old(d) 1e+06 1.45s 1.45s 206MB 1 6 15.516
#> 10: trunc_week_new(d) 1e+06 88.27ms 93.29ms 57.22MB 6 6 1.000
trunc_month_old = function(x) {
# IDateTime.R#81
as.IDate(ISOdate(year(x), month(x), 1L))
}
trunc_month_new = function(x) {
x - as.POSIXlt(x)$mday + 1L
}
r = bench::press(
n = c(100, 1000, 10000, 100000, 1000000),
{
d = sample(x, size = n, replace = TRUE)
bench::mark(trunc_month_old(d), trunc_month_new(d))
}
)
print.bench(r)
#> expression n min median mem_alloc n_itr n_gc relative
#> <char> <num> <bench_time> <bench_time> <bench_bytes> <int> <num> <num>
#> 1: trunc_month_old(d) 1e+02 198.49µs 219.99µs 27.5KB 1947 2 7.332
#> 2: trunc_month_new(d) 1e+02 28.26µs 30µs 6.03KB 9996 4 1.000
#> 3: trunc_month_old(d) 1e+03 1.47ms 1.5ms 216.91KB 329 1 15.449
#> 4: trunc_month_new(d) 1e+03 94.05µs 96.94µs 55.25KB 4683 3 1.000
#> 5: trunc_month_old(d) 1e+04 13.93ms 14.39ms 2.1MB 35 0 17.223
#> 6: trunc_month_new(d) 1e+04 794.89µs 835.49µs 547.44KB 571 4 1.000
#> 7: trunc_month_old(d) 1e+05 139.84ms 141.22ms 20.98MB 3 1 17.648
#> 8: trunc_month_new(d) 1e+05 7.93ms 8ms 5.34MB 58 4 1.000
#> 9: trunc_month_old(d) 1e+06 1.42s 1.42s 209.81MB 1 4 17.221
#> 10: trunc_month_new(d) 1e+06 80.22ms 82.72ms 53.41MB 6 7 1.000
trunc_quarter_old = function(x) {
# IDateTime.R#82
as.IDate(ISOdate(year(x), 3L * (quarter(x) - 1L) + 1L, 1L))
}
trunc_quarter_new = function(x) {
l = as.POSIXlt(x)
l$mon = (l$mon %/% 3L) * 3L
l$mday = 1L
as.IDate(l)
}
r = bench::press(
n = c(100, 1000, 10000, 100000, 1000000),
{
d = sample(x, size = n, replace = TRUE)
bench::mark(trunc_quarter_old(d), trunc_quarter_new(d))
}
)
print.bench(r)
#> expression n min median mem_alloc n_itr n_gc relative
#> <char> <num> <bench_time> <bench_time> <bench_bytes> <int> <num> <num>
#> 1: trunc_quarter_old(d) 1e+02 199µs 217.63µs 28.12KB 2213 2 6.289
#> 2: trunc_quarter_new(d) 1e+02 32.26µs 34.61µs 18.62KB 9995 5 1.000
#> 3: trunc_quarter_old(d) 1e+03 1.55ms 1.64ms 216.91KB 294 1 8.413
#> 4: trunc_quarter_new(d) 1e+03 177.79µs 194.43µs 98.64KB 2308 3 1.000
#> 5: trunc_quarter_old(d) 1e+04 15.15ms 15.46ms 2.1MB 31 1 8.443
#> 6: trunc_quarter_new(d) 1e+04 1.73ms 1.83ms 977.55KB 258 4 1.000
#> 7: trunc_quarter_old(d) 1e+05 153.45ms 153.83ms 20.98MB 2 2 8.313
#> 8: trunc_quarter_new(d) 1e+05 18.02ms 18.5ms 9.54MB 19 6 1.000
#> 9: trunc_quarter_old(d) 1e+06 1.59s 1.59s 209.81MB 1 5 6.672
#> 10: trunc_quarter_new(d) 1e+06 187.06ms 238.51ms 95.37MB 3 6 1.000
trunc_year_old = function(x) {
# IDateTime.R#83
as.IDate(ISOdate(year(x), 1L, 1L))
}
trunc_year_new = function(x) {
x - as.POSIXlt(x)$yday
}
r = bench::press(
n = c(100, 1000, 10000, 100000, 1000000),
{
d = sample(x, size = n, replace = TRUE)
bench::mark(trunc_year_old(d), trunc_year_new(d))
}
)
print.bench(r)
#> expression n min median mem_alloc n_itr n_gc relative
#> <char> <num> <bench_time> <bench_time> <bench_bytes> <int> <num> <num>
#> 1: trunc_year_old(d) 1e+02 178.84µs 184.22µs 17.56KB 2405 2 8.662
#> 2: trunc_year_new(d) 1e+02 19.81µs 21.27µs 5.59KB 9997 3 1.000
#> 3: trunc_year_old(d) 1e+03 1.26ms 1.3ms 161.7KB 375 0 14.864
#> 4: trunc_year_new(d) 1e+03 85.66µs 87.45µs 51.3KB 5431 3 1.000
#> 5: trunc_year_old(d) 1e+04 12.01ms 12.62ms 1.57MB 40 0 16.732
#> 6: trunc_year_new(d) 1e+04 743.28µs 754.02µs 508.33KB 578 3 1.000
#> 7: trunc_year_old(d) 1e+05 125.14ms 125.57ms 15.64MB 3 1 16.846
#> 8: trunc_year_new(d) 1e+05 7.38ms 7.45ms 4.96MB 64 3 1.000
#> 9: trunc_year_old(d) 1e+06 1.27s 1.27s 156.4MB 1 4 17.129
#> 10: trunc_year_new(d) 1e+06 73.92ms 74.31ms 49.59MB 7 3 1.000