node duckdb.cjs mydata.csv
will create the file mydata.parquet
with GZIP compression.
const duckdb = require('@duckdb/duckdb-wasm'); | |
const path = require('path'); | |
const fs = require("fs") | |
const Worker = require('web-worker'); | |
const DUCKDB_DIST = path.dirname(require.resolve('@duckdb/duckdb-wasm')); | |
// node duckdb.cjs filename | |
// expects csv file in the same directory | |
let filename = process.argv[2] | |
filename = filename.replace(/\.csv/,"") | |
console.log("filename", filename) | |
(async () => { | |
try { | |
const DUCKDB_CONFIG = await duckdb.selectBundle({ | |
mvp: { | |
mainModule: path.resolve(DUCKDB_DIST, './duckdb-mvp.wasm'), | |
mainWorker: path.resolve(DUCKDB_DIST, './duckdb-node-mvp.worker.cjs'), | |
}, | |
next: { | |
mainModule: path.resolve(DUCKDB_DIST, './duckdb-next.wasm'), | |
mainWorker: path.resolve(DUCKDB_DIST, './duckdb-node-next.worker.cjs'), | |
}, | |
}); | |
const logger = new duckdb.ConsoleLogger(); | |
const worker = new Worker(DUCKDB_CONFIG.mainWorker); | |
const db = new duckdb.AsyncDuckDB(logger, worker); | |
await db.instantiate(DUCKDB_CONFIG.mainModule, DUCKDB_CONFIG.pthreadWorker); | |
const conn = await db.connect(); | |
console.log("reading") | |
const txt = fs.readFileSync(`${filename}.csv`).toString() | |
await db.registerFileText(`${filename}.csv`, txt); | |
await conn.insertCSVFromPath(`${filename}.csv`, { | |
schema: 'main', | |
name: filename, | |
}) | |
console.log("inserted, querying") | |
let res = await conn.query(`SELECT * from ${filename} LIMIT 1`); | |
console.log(res.toArray()) | |
await conn.query(`COPY (SELECT * FROM ${filename}) TO '${filename}.parquet' (FORMAT 'parquet', CODEC 'GZIP')`) | |
const buffer = await db.copyFileToBuffer(`${filename}.parquet`) | |
console.log("buffer len", buffer.length) | |
fs.writeFileSync(`${filename}.parquet`, Buffer.from(new Uint8Array(buffer))); | |
console.log("parquet file written") | |
await conn.close(); | |
await db.terminate(); | |
await worker.terminate(); | |
console.log("all done") | |
} catch (e) { | |
console.error(e); | |
} | |
})(); |
{ | |
"name": "bandcamp", | |
"version": "0.0.1", | |
"description": "", | |
"main": "duckdb.js", | |
"scripts": { | |
"test": "echo \"Error: no test specified\" && exit 1" | |
}, | |
"author": "", | |
"license": "ISC", | |
"dependencies": { | |
"@duckdb/duckdb-wasm": "1.14.3" | |
} | |
} |