-
-
Save artemklevtsov/6b39f44fa2745cf970770cc0b9445693 to your computer and use it in GitHub Desktop.
#' @title Floor Dates | |
#' @param x A vector of date. | |
#' @param unit A character string specifying a time unit. | |
#' @param start.on.monday Should the week start on Mondays or Sundays? | |
#' @return An object of class "Date". | |
floor_date <- function(x, unit = c("day", "week", "month", "quarter", "year"), start.on.monday = TRUE) { | |
stopifnot(is(x, "Date")) | |
unit <- match.arg(unit) | |
if (unit == "day") { | |
return(x) | |
} | |
if (unit == "week") { | |
l <- ((unclass(ll) - 3L) %/% 7L) * 7L + 4L | |
if (start.on.monday) { | |
return(.Date(7 * ((unclass(x) - 4L) %/% 7) + 4L)) | |
} else { | |
return(.Date(7 * ((unclass(x) - 3L) %/% 7) + 3L)) | |
} | |
} else { | |
l <- as.POSIXlt(x) | |
l <- switch( | |
unit, | |
month = l$mday, | |
quarter = l$mon %/% 3L, | |
year = l$year | |
) | |
} | |
return(x - l + 1L) | |
} |
artemklevtsov
commented
Mar 16, 2020
for weeks, can't we just do .Date(7*(unclass(x) %/% 7))?
It provides a different results:
> head(.Date(7 * (unclass(ll) %/% 7)), 20)
[1] "1970-01-01" "1970-01-01" "1970-01-01" "1970-01-01" "1970-01-01" "1970-01-01" "1970-01-01" "1970-01-08"
[9] "1970-01-08" "1970-01-08" "1970-01-08" "1970-01-08" "1970-01-08" "1970-01-08" "1970-01-15" "1970-01-15"
[17] "1970-01-15" "1970-01-15" "1970-01-15" "1970-01-15"
> head(floor_date(ll, "week"), 20)
[1] "1969-12-29" "1969-12-29" "1969-12-29" "1969-12-29" "1970-01-05" "1970-01-05" "1970-01-05" "1970-01-05"
[9] "1970-01-05" "1970-01-05" "1970-01-05" "1970-01-12" "1970-01-12" "1970-01-12" "1970-01-12" "1970-01-12"
[17] "1970-01-12" "1970-01-12" "1970-01-19" "1970-01-19"
sorry I was being imprecise... since Jan 1 1970 is I think a Wednesday you'd need to include an offset after unclass
That's right, but:
> head((unclass(ll) - 3L) %/% 7, 20)
[1] -1 -1 -1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 2 2 2
> head(unclass(ll) - as.POSIXlt(ll)$wday, 20)
[1] -4 -4 -4 3 3 3 3 3 3 3 10 10 10 10 10 10 10 17 17 17
How can we get it?
Found:
> head(unclass(ll) - as.POSIXlt(ll)$wday, 20)
[1] -4 -4 -4 3 3 3 3 3 3 3 10 10 10 10 10 10 10 17 17 17
> head(((unclass(ll) - 3L) %/% 7L) * 7L + 3L, 20)
[1] -4 -4 -4 3 3 3 3 3 3 3 10 10 10 10 10 10 10 17 17 17
yea depends on trunc to monday or sunday, I see
all.equal(
lubridate::floor_date(ll, "week", 1),
.Date(7*((unclass(ll) - 4L) %/% 7) + 4L)
)
all.equal(
lubridate::floor_date(ll, "week"),
.Date(7*((unclass(ll) - 3L) %/% 7) + 3L)
)
There should be a way to get that number and make your floor_date
about to match lubridate::floor_date
completely 👍
I see this on the benchmark:
library(lubridate)
st <- as.Date("1970-01-01")
en <- as.Date("2020-03-15")
ll <- seq.Date(st, en, by = "day")
base_floor_date <- function(x, unit = c("day", "week", "month", "quarter", "year"), start.on.monday = TRUE) {
stopifnot(is(x, "Date"))
unit <- match.arg(unit)
if (unit == "day") {
return(x)
}
if (unit == 'week') {
offset = 3L + start.on.monday
return(.Date(7L*((unclass(x) - offset) %/% 7L) + offset))
}
l <- as.POSIXlt(x)
l <- switch(
unit,
week = l$wday,
month = l$mday,
quarter = l$mon %/% 3L,
year = l$year
)
if (unit == "week" && start.on.monday) {
l <- replace(l, l == 0L, 7L)
}
return(x - l + 1L)
}
bench::mark(
floor_date(ll, "week", 1),
as.Date(cut.Date(ll, "week")),
base_floor_date(ll, "week")
)
# A tibble: 3 x 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int>
1 floor_date(ll, "week", 1) 4.64ms 5.12ms 181. 2.45MB 9.17 79
2 as.Date(cut.Date(ll, "week")) 63.35ms 67.93ms 12.3 5.52MB 2.04 6
3 base_floor_date(ll, "week") 281.35µs 325.04µs 2867. 280.7KB 6.60 1304
# … with 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
# memory <list>, time <list>, gc <list>
So, now it's perfect.
> bench::mark(
+ lubridate::floor_date(ll, "week", 1),
+ as.Date(cut.Date(ll, "week")),
+ floor_date(ll, "week", TRUE),
+ floor_date_(ll, "week", TRUE)
+ )
# A tibble: 4 x 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list>
1 lubridate::floor_date(ll, "week", 1) 4.51ms 4.65ms 213. NA 10.8 99 5 464ms <date…
2 as.Date(cut.Date(ll, "week")) 46.58ms 47.04ms 21.3 NA 2.13 10 1 470ms <date…
3 floor_date(ll, "week", TRUE) 1.27ms 1.3ms 756. NA 21.7 349 10 462ms <date…
4 floor_date_(ll, "week", TRUE) 772.06µs 783.96µs 1195. NA 4.53 528 2 442ms <date…
# … with 3 more variables: memory <list>, time <list>, gc <list>
> bench::mark(
+ lubridate::floor_date(ll, "week", 7),
+ floor_date(ll, "week", FALSE),
+ floor_date_(ll, "week", FALSE)
+ )
# A tibble: 3 x 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list>
1 lubridate::floor_date(ll, "week", 7) 4.5ms 4.9ms 201. NA 10.9 92 5 457ms <date…
2 floor_date(ll, "week", FALSE) 1.24ms 1.29ms 717. NA 17.1 336 8 469ms <date…
3 floor_date_(ll, "week", FALSE) 771.52µs 787.64µs 1168. NA 6.87 510 3 437ms <date…
# … with 3 more variables: memory <list>, time <list>, gc <list>
@MichaelChirico thank you for the notes. I hope we will look something like that in data.table
.
🚀 i wonder lubridate
would accept a PR... I don't see mem_alloc
on your benchmark, but on mine it's 5x faster & 10x less memory... sacrifice is slight readability issue on internal code? can be solved with comments ideally
Strange to me there's no trunc.Date('week')
method in base
. Anyway, we might consider supporting that in trunc.IDate
if you could please file PR 🙏
lubridate
wants stringr
...
Anyway, we might consider supporting that in trunc.IDate if you could please file PR pray
Good idea. I'll do it.