Skip to content

Instantly share code, notes, and snippets.

@tjmahr
Created August 19, 2024 20:51
Show Gist options
  • Save tjmahr/4d485ec739117551a5ce7d0e70017e2b to your computer and use it in GitHub Desktop.
Save tjmahr/4d485ec739117551a5ce7d0e70017e2b to your computer and use it in GitHub Desktop.
struct_pack() in duckdb
# Make a csv to read in
db1 <- DBI::dbConnect(duckdb::duckdb())
f <- tempfile("mtcars", fileext = ".csv")
write.csv(mtcars, f)

library(tidyverse)

# Read in the csv.
# Here two column types are hard-coded using a struct
query <- sprintf("read_csv_auto('%s', types = {'mpg': 'DOUBLE', 'cyl': 'BIGINT'})", f)
tbl(db1, query)
#> # Source:   SQL [?? x 12]
#> # Database: DuckDB v1.0.0 [mahr@Windows 10 x64:R 4.4.0/:memory:]
#>    column00      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>    <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 Mazda RX4    21       6  160    110  3.9   2.62  16.5     0     1     4     4
#>  2 Mazda RX4 …  21       6  160    110  3.9   2.88  17.0     0     1     4     4
#>  3 Datsun 710   22.8     4  108     93  3.85  2.32  18.6     1     1     4     1
#>  4 Hornet 4 D…  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
#>  5 Hornet Spo…  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2
#>  6 Valiant      18.1     6  225    105  2.76  3.46  20.2     1     0     3     1
#>  7 Duster 360   14.3     8  360    245  3.21  3.57  15.8     0     0     3     4
#>  8 Merc 240D    24.4     4  147.    62  3.69  3.19  20       1     0     4     2
#>  9 Merc 230     22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2
#> 10 Merc 280     19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4
#> # ℹ more rows

# Bad SQL (column `fake` doesn't exist but we can't see the error message
# presumably because the {} are misread as string-interpolation delimiters by 
# cli)
query <- sprintf("read_csv_auto('%s', types = {'mpg': 'DOUBLE', 'fake': 'BIGINT'})", f)
tbl(db1, query)
#> Error in "fun(..., .envir = .envir)": ! Could not parse cli `{}` expression: `'mpg': 'DOUBLE', 'f…`.
#> Caused by error in `parse(text = code, keep.source = FALSE)`:
#> ! <text>:1:16: unexpected ','
#> 1: 'mpg': 'DOUBLE',
#>                    ^


# Workaround using struct_pack() to avoid the {} characters
query <- sprintf("read_csv_auto('%s', types = struct_pack(mpg := 'DOUBLE', cyl := 'BIGINT'))", f)
tbl(db1, query)
#> # Source:   SQL [?? x 12]
#> # Database: DuckDB v1.0.0 [mahr@Windows 10 x64:R 4.4.0/:memory:]
#>    column00      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>    <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 Mazda RX4    21       6  160    110  3.9   2.62  16.5     0     1     4     4
#>  2 Mazda RX4 …  21       6  160    110  3.9   2.88  17.0     0     1     4     4
#>  3 Datsun 710   22.8     4  108     93  3.85  2.32  18.6     1     1     4     1
#>  4 Hornet 4 D…  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
#>  5 Hornet Spo…  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2
#>  6 Valiant      18.1     6  225    105  2.76  3.46  20.2     1     0     3     1
#>  7 Duster 360   14.3     8  360    245  3.21  3.57  15.8     0     0     3     4
#>  8 Merc 240D    24.4     4  147.    62  3.69  3.19  20       1     0     4     2
#>  9 Merc 230     22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2
#> 10 Merc 280     19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4
#> # ℹ more rows

# Bad SQL (column `fake` doesn't exist and we can see the informative message)
query <- sprintf("read_csv_auto('%s', types = struct_pack(mpg := 'DOUBLE', fake := 'BIGINT'))", f)
tbl(db1, query)
#> Error in `db_query_fields.DBIConnection()`:
#> ! Can't query fields.
#> ℹ Using SQL: SELECT * FROM (FROM
#>   read_csv_auto('C:\Users\mahr\AppData\Local\Temp\3\RtmpkbJBU7\mtcars41fc7d9476f7.csv',
#>   types = struct_pack(mpg := 'DOUBLE', fake := 'BIGINT'))) q01 WHERE (0 = 1)
#> Caused by error:
#> ! rapi_prepare: Failed to prepare query SELECT *
#> FROM (FROM read_csv_auto('C:\Users\mahr\AppData\Local\Temp\3\RtmpkbJBU7\mtcars41fc7d9476f7.csv', types = struct_pack(mpg := 'DOUBLE', fake := 'BIGINT'))) q01
#> WHERE (0 = 1)
#> Error: Binder Error: COLUMN_TYPES error: Columns with names: "fake" do not exist in the CSV File

Created on 2024-08-19 with reprex v2.1.1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment