Last active
August 5, 2022 16:42
-
-
Save brshallo/a90bcc258f436b428ba4c23cd56cae96 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
c_sort_collapse <- function(...){ | |
c(...) |> | |
sort() |> | |
paste(collapse = ".") | |
} | |
unique_set <- function(...){ | |
list(...) |> | |
purrr::pmap_chr(c_sort_collapse) |> | |
unique() | |
} | |
set.seed(123) | |
x <- sample(letters, 1000, replace = TRUE) | |
set.seed(12) | |
y <- sample(letters, 1000, replace = TRUE) | |
unique_set_matrix <- function(...){ | |
matrix(c(...), nrow = length(list(...)), byrow = TRUE) |> | |
apply(2, sort) |> | |
asplit(2) |> | |
unique() |> | |
sapply(paste, collapse = ".") | |
} | |
microbenchmark::microbenchmark( | |
tidy = unique_set(x, y), | |
matrix = unique_set_matrix(x, y), | |
Map = Map(\(x,y) paste0(sort(c(x,y)) , collapse = ".") , x , y) |> | |
unique() |> unlist(), | |
times = 20 | |
) | |
#> Unit: milliseconds | |
#> expr min lq mean median uq max neval | |
#> tidy 23.0659 24.4679 30.04869 28.94710 33.89015 47.0418 20 | |
#> matrix 23.1267 25.5710 29.10305 28.10055 31.65875 46.9684 20 | |
#> Map 21.9148 26.1991 28.88400 27.78860 29.78030 46.8089 20 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See associated SO question: https://stackoverflow.com/questions/73243043/return-unique-combinations-of-items-r?noredirect=1#comment129367035_73243043 -- thus far seems like the best improvement comes from replacing
sort()
withstringi::stri_sort()