Skip to content

Instantly share code, notes, and snippets.

@vankesteren
Last active September 6, 2024 19:25
Show Gist options
  • Save vankesteren/ceac6f46a94d02c2cc7ad3fb2ca0da16 to your computer and use it in GitHub Desktop.
Save vankesteren/ceac6f46a94d02c2cc7ad3fb2ca0da16 to your computer and use it in GitHub Desktop.
Stop storing huge full-factorial grids, start using virtual grids. It behaves like a data frame but uses only a fraction of the memory!
virtual_grid <- function(...) {
pars <- list(...)
lens <- vapply(pars, length, 1L)
return(structure(list(pars = pars, lens = lens), class = c("vgrid", "tbl_lazy")))
}
dim.vgrid <- function(x) {
as.integer(c(prod(x$lens), length(x$lens)))
}
collect.vgrid <- function(x, ...) {
do.call(expand_grid, args = x$pars)
}
print.vgrid <- function(x, ...) {
cat("A virtual grid: ", nrow(x), "x", ncol(x), "\n\n")
if (nrow(x) < 7)
print(collect(x))
else {
cat("First three rows:\n")
print(head(x, 3))
cat("\nLast three rows:\n")
print(tail(x, 3), sum = "")
}
}
collect_vgrid_element <- function(x, i, j) {
if (i > nrow(x)) rlang::abort("Can't subset rows past the end!")
if (j > ncol(x)) rlang::abort("Can't subset columns past the end!")
# how many repetitions for each value?
reps <- if (j == ncol(x)) 1 else prod(x$lens[(j + 1):ncol(x)])
idx <- ceiling((i / reps) %% x$lens[j])
if (idx == 0) idx <- x$lens[j]
el <- x$pars[[j]][idx]
names(el) <- names(x$lens)[j]
el
}
collect_vgrid_row <- function(x, i) {
# if (i > nrow(x)) return(as_tibble(lapply(x$pars)))
row_list <- lapply(seq_along(x$pars), function(j) {
collect_vgrid_element(x, i, j)
})
names(row_list) <- names(x$lens)
return(structure(as_tibble(row_list), class = c("vgrid_slice", "tbl_df", "tbl", "data.frame")))
}
collect_vgrid_col <- function(x, j) {
col <- vapply(1:nrow(x), function(i) collect_vgrid_element(x, i, j), x$pars[[j]][1])
col <- as_tibble(col)
names(col) <- names(x$lens)[j]
return(col)
}
tbl_sum.vgrid_slice <- function(x, ...) {
paste("A virtual grid slice:", nrow(x), "x", ncol(x))
}
`[.vgrid` <- function(x, i, j, drop = FALSE, ...) {
i_arg <- substitute(i)
j_arg <- substitute(j)
if (missing(i)) {
i <- NULL
i_arg <- NULL
} else if (is.null(i)) {
i <- integer()
}
if (missing(j)) {
j <- NULL
j_arg <- NULL
} else if (is.null(j)) {
j <- integer()
}
# Ignore drop as an argument for counting
n_real_args <- nargs() - !missing(drop)
# Column or matrix subsetting if nargs() == 2L
if (n_real_args <= 2L) {
if (!missing(drop)) {
rlang::warn("`drop` argument ignored for subsetting a virtual grid with `x[j]`, it has an effect only for `x[i, j]`.")
drop <- FALSE
}
j <- i
i <- NULL
j_arg <- i_arg
i_arg <- NULL
}
if (is.null(j)) {
if (is.null(i) | length(i) < 1) {
rlang::warn("No indices found")
return()
}
if (length(i) == 1) {
return(collect_vgrid_row(x, i))
}
return(bind_rows(lapply(i, collect_vgrid_row, x = x)))
}
if (is.null(i)) {
# column collection
if (length(j == 1)) {
return(collect_vgrid_col(x, j))
}
return(bind_cols(lapply(j, collect_vgrid_col, x = x)))
}
# full slicing
if (length(i) == 1) {
if (length(j) == 1) {
val <- collect_vgrid_element(x, i, j)
val <- as_tibble(val)
names(val) <- names(x$lens[j])
return(val)
}
return(collect_vgrid_row(x, i)[,j])
}
if (length(i) > 1) {
return(bind_rows(lapply(i, collect_vgrid_row, x = x))[,j])
}
}
@vankesteren
Copy link
Author

source("https://gist.githubusercontent.com/vankesteren/ceac6f46a94d02c2cc7ad3fb2ca0da16/raw/4d05c2f58ef0dd5a04d4f8d246c29b88aa0301ee/virtualgrid.R")

# replacement for expand_grid!
vg <- virtual_grid(
  a = 1:5,
  b = letters[1:3],
  c = LETTERS[7:12]
)

# it behaves like a data frame but uses only a fraction of the memory!
print(vg)
dim(vg)
nrow(vg)
ncol(vg)
vg[1,]
vg[,2]
vg[3]
vg[43:47, c("a", "b")]
head(vg)
tail(vg)
collect(vg)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment