Last active
January 8, 2019 14:25
-
-
Save jankowtf/90c0f6bbcd8a843f337653a54347c37a to your computer and use it in GitHub Desktop.
TIL: defining custom dplyr methods
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
TIL how to define custom dplyr methods. As a bonus, I also got to work with the `vctrs` package a bit for the first time :-) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: 'TIL: how to create custom dplyr methods' | |
author: "Janko Thyson" | |
date: "2019-01-08" | |
output: html_document | |
--- | |
```{r setup, include=FALSE} | |
knitr::opts_chunk$set(echo = TRUE) | |
``` | |
TIL how to define custom dplyr methods. As a bonus, I also got to work with the `vctrs` package a bit for the first time :-) | |
A couple of things to highlight: | |
Resources I found very helpful: | |
- GitHub issue [#3429](https://github.com/tidyverse/dplyr/issues/3429) | |
- GitHub issue [#3923](https://github.com/tidyverse/dplyr/issues/3923) | |
- [Answer on SO](https://stackoverflow.com/questions/54083208/method-dispatch-for-functions-inside-dplyrdo/54084385#54084385) by [astrofunkswag](https://stackoverflow.com/users/5871218/astrofunkswag) | |
## Code | |
```{r} | |
library(dplyr) | |
#> | |
#> Attaching package: 'dplyr' | |
#> The following objects are masked from 'package:stats': | |
#> | |
#> filter, lag | |
#> The following objects are masked from 'package:base': | |
#> | |
#> intersect, setdiff, setequal, union | |
# Constructor for tbl_df_custom class ------------------------------------- | |
new_df_custom <- function(x = tibble()) { | |
stopifnot(tibble::is_tibble(x)) | |
structure(x, class = c("tbl_df_custom", class(x))) | |
} | |
# Example data ------------------------------------------------------------ | |
df_custom <- new_df_custom( | |
x = tibble::tibble( | |
id = c(rep("A", 3), rep("B", 3)), | |
x = 1:6 | |
) | |
) | |
df_custom | |
#> # A tibble: 6 x 2 | |
#> id x | |
#> * <chr> <int> | |
#> 1 A 1 | |
#> 2 A 2 | |
#> 3 A 3 | |
#> 4 B 4 | |
#> 5 B 5 | |
#> 6 B 6 | |
df_custom %>% class() | |
#> [1] "tbl_df_custom" "tbl_df" "tbl" "data.frame" | |
# Reclass function for preserving custom class attribute ------------------ | |
reclass <- function(x, to) { | |
UseMethod('reclass') | |
} | |
reclass.default <- function(x, to) { | |
class(x) <- unique(c(class(to)[[1]], class(x))) | |
attr(x, class(to)[[1]]) <- attr(to, class(to)[[1]]) | |
x | |
} | |
# Custom method for summarise --------------------------------------------- | |
summarise.tbl_df_custom <- function (.data, ...) { | |
message("Custom method for `summarise`") | |
vctrs::vec_restore(NextMethod(), .data) | |
} | |
# Custom method for group_by ---------------------------------------------- | |
group_by.tbl_df_custom <- function (.data, ..., add = FALSE, | |
use_vec_restore = FALSE | |
) { | |
message("Custom method for `group_by`") | |
retval <- reclass(NextMethod(), .data) | |
print(class(retval)) | |
retval | |
} | |
# Custom method for ungroup ---------------------------------------------- | |
ungroup.tbl_df_custom <- function (.data, ...) { | |
message("custom method for `ungroup`") | |
vctrs::vec_restore(NextMethod(), .data) | |
} | |
# Custom method for do ---------------------------------------------------- | |
do.tbl_df_custom <- function (.data, ...) { | |
message("custom method for `do`") | |
vctrs::vec_restore(NextMethod(), .data) | |
} | |
# Custom extraction method ------------------------------------------------ | |
`[.tbl_df_custom` <- function(x, ...) { | |
message("custom method for `[`") | |
new_df_custom(NextMethod()) | |
} | |
# Create custom methods for foo ------------------------------------------- | |
foo <- function(df) { | |
UseMethod("foo") | |
} | |
foo.default <- function(df) { | |
message("Default method for `foo`") | |
df %>% | |
summarise(y = mean(x)) | |
} | |
foo.tbl_df_custom <- function(df) { | |
message("Custom method for `foo`") | |
df %>% | |
summarise(y = mean(x) * 100) | |
} | |
# Testing things out ------------------------------------------------------ | |
retval <- df_custom %>% | |
group_by(id) %>% | |
do(foo(.)) | |
#> Custom method for `group_by` | |
#> [1] "tbl_df_custom" "grouped_df" "tbl_df" "tbl" | |
#> [5] "data.frame" | |
#> custom method for `do` | |
#> custom method for `ungroup` | |
#> custom method for `[` | |
#> Custom method for `foo` | |
#> Custom method for `summarise` | |
#> custom method for `[` | |
#> Custom method for `foo` | |
#> Custom method for `summarise` | |
retval | |
#> custom method for `[` | |
#> custom method for `ungroup` | |
#> # A tibble: 2 x 2 | |
#> # Groups: id [2] | |
#> id y | |
#> <chr> <dbl> | |
#> 1 A 200 | |
#> 2 B 500 | |
retval %>% class() | |
#> [1] "tbl_df_custom" "grouped_df" "tbl_df" "tbl" | |
#> [5] "data.frame" | |
``` | |
Created on 2019-01-08 by the [reprex package](https://reprex.tidyverse.org/) (v0.2.1) | |
## Alternative to `reclass()`: `vctrs::vec_restore()` | |
```{r} | |
# Alternative version for group_by that uses vctrs::vec_restore ----------- | |
group_by.tbl_df_custom <- function (.data, ..., add = FALSE) { | |
message("Custom method for `group_by`") | |
retval <- vctrs::vec_restore(NextMethod(), .data) | |
print(class(retval)) | |
retval | |
} | |
retval <- df_custom %>% | |
group_by(id) %>% | |
do(foo(.)) | |
#> Custom method for `group_by` | |
#> [1] "tbl_df_custom" "tbl_df" "tbl" "data.frame" | |
#> custom method for `do` | |
#> Custom method for `foo` | |
#> Custom method for `summarise` | |
retval | |
#> custom method for `[` | |
#> # A tibble: 1 x 1 | |
#> y | |
#> <dbl> | |
#> 1 350 | |
retval %>% class() | |
#> [1] "tbl_df_custom" "tbl_df" "tbl" "data.frame" | |
``` | |
Created on 2019-01-08 by the [reprex package](https://reprex.tidyverse.org/) (v0.2.1) | |
Note that when using the alternative version of `group_by()` that uses `vctrs::vec_restore()` instead of `reclass()`, the class attribute `grouped_df` is dropped. | |
## Alternative to `reclass()`: `vec_restore_inclusive()` | |
This is an own implementation that tries to leverage the way `vctrs::vec_restore()` works while also considering attributes of `to` in the decision of how the "reset" is carried out. Arguably, "combine" or "align" would be better name components for the function. | |
```{r} | |
vec_restore_inclusive <- function(x, to) { | |
UseMethod('vec_restore_inclusive') | |
} | |
vec_restore_inclusive.data.frame <- function (x, to) { | |
attr_to <- attributes(to) | |
attr_x <- attributes(x) | |
attr_use <- if ( | |
length(classes_preserve <- setdiff(attr_to[["class"]], attr_x[["class"]])) | |
) { | |
attr_x | |
} else { | |
attr_to | |
} | |
attr_use[["names"]] <- attr_x[["names"]] | |
attr_use[["row.names"]] <- .set_row_names(vctrs:::df_length(x)) | |
attr_use[["class"]] <- unique(c(classes_preserve, attr_x[["class"]])) | |
attributes(x) <- attr_use | |
x | |
} | |
group_by.tbl_df_custom <- function (.data, ..., add = FALSE) { | |
message("Custom method for `group_by`") | |
retval <- vec_restore_inclusive(NextMethod(), .data) | |
print(class(retval)) | |
retval | |
} | |
retval <- df_custom %>% | |
group_by(id) %>% | |
do(foo(.)) | |
#> Custom method for `group_by` | |
#> [1] "tbl_df_custom" "grouped_df" "tbl_df" "tbl" | |
#> [5] "data.frame" | |
#> custom method for `do` | |
#> custom method for `ungroup` | |
#> custom method for `[` | |
#> Custom method for `foo` | |
#> Custom method for `summarise` | |
#> custom method for `[` | |
#> Custom method for `foo` | |
#> Custom method for `summarise` | |
retval | |
#> custom method for `[` | |
#> custom method for `ungroup` | |
#> # A tibble: 2 x 2 | |
#> # Groups: id [2] | |
#> id y | |
#> <chr> <dbl> | |
#> 1 A 200 | |
#> 2 B 500 | |
retval %>% class() | |
#> [1] "tbl_df_custom" "grouped_df" "tbl_df" "tbl" | |
#> [5] "data.frame" | |
``` | |
Created on 2019-01-08 by the [reprex package](https://reprex.tidyverse.org/) (v0.2.1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment