Skip to content

Instantly share code, notes, and snippets.

@iangow
Last active September 19, 2024 00:02
Show Gist options
  • Save iangow/ce559f93639d63bdcb05d8bb0afae9bc to your computer and use it in GitHub Desktop.
Save iangow/ce559f93639d63bdcb05d8bb0afae9bc to your computer and use it in GitHub Desktop.
Code to get a parquet file into PostgreSQL.
library(dplyr, warn.conflicts = FALSE)
library(DBI)
db <- dbConnect(duckdb::duckdb())
dbExecute(db, "ATTACH '' AS pg (TYPE POSTGRES);")
dbExecute(db, "COPY pg.crsp.dsf FROM '/Users/iangow/Library/CloudStorage/Dropbox/pq_data/crsp/dsf.parquet'")
create_view <- function(conn, table, schema = "",
data_dir = Sys.getenv("DATA_DIR")) {
dbExecute(conn, paste0("CREATE SCHEMA IF NOT EXISTS ", schema))
file_path <- file.path(data_dir, schema, paste0(table, ".parquet"))
df_sql <- paste0("CREATE VIEW ", schema, ".", table, " AS ",
"SELECT * FROM read_parquet('", file_path,
"')")
DBI::dbExecute(conn, dplyr::sql(df_sql))
}
db <- dbConnect(duckdb::duckdb())
crsp.dsi <- create_view(db, table = "dsi", schema = "crsp")
crsp.dsf <- create_view(db, table = "dsf", schema = "crsp")
dbGetQuery(db, "SELECT * FROM crsp.dsi LIMIT 10")
dbGetQuery(db, "SELECT * FROM crsp.dsf LIMIT 10")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment