Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"extends": "eslint:recommended",
"parserOptions": {
"sourceType": "module",
"ecmaVersion": 2018
"ecmaVersion": 2020
},
"env": {
"es6": true,
Expand Down
10 changes: 9 additions & 1 deletion bin/resolve-dependencies
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,11 @@ const mains = ["unpkg", "jsdelivr", "browser", "main"];
}
{
const package = await resolve("apache-arrow@4");
console.log(`export const arrow = dependency("${package.name}", "${package.version}", "${package.export}");`);
console.log(`export const arrow4 = dependency("${package.name}", "${package.version}", "${package.export}");`);
}
{
const package = await resolve("apache-arrow@9");
console.log(`export const arrow9 = dependency("${package.name}", "${package.version}", "+esm");`);
}
{
const package = await resolve("arquero");
Expand All @@ -86,6 +90,10 @@ const mains = ["unpkg", "jsdelivr", "browser", "main"];
const package = await resolve("leaflet");
console.log(`export const leaflet = dependency("${package.name}", "${package.version}", "${package.export.replace(/-src\.js$/, ".js")}");`);
}
{
const package = await resolve("@duckdb/duckdb-wasm");
console.log(`export const duckdb = dependency("${package.name}", "${package.version}", "+esm");`);
}
})();

async function resolve(specifier) {
Expand Down
1 change: 1 addition & 0 deletions rollup.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export default [
reserved: [
"FileAttachment",
"RequireError",
"DuckDBClient",
"SQLiteDatabaseClient",
"Workbook",
"ZipArchive",
Expand Down
58 changes: 58 additions & 0 deletions src/arrow.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Returns true if the vaue is an Apache Arrow table. This uses a “duck” test
// (instead of strict instanceof) because we want it to work with a range of
// Apache Arrow versions at least 7.0.0 or above.
// https://arrow.apache.org/docs/7.0/js/classes/Arrow_dom.Table.html
export function isArrowTable(value) {
return (
value &&
typeof value.getChild === "function" &&
typeof value.toArray === "function" &&
value.schema &&
Array.isArray(value.schema.fields)
);
}

export function getArrowTableSchema(table) {
return table.schema.fields.map(getArrowFieldSchema);
}

function getArrowFieldSchema(field) {
return {
name: field.name,
type: getArrowType(field.type),
nullable: field.nullable,
databaseType: String(field.type)
};
}

// https://github.com/apache/arrow/blob/89f9a0948961f6e94f1ef5e4f310b707d22a3c11/js/src/enum.ts#L140-L141
function getArrowType(type) {
switch (type.typeId) {
case 2: // Int
return "integer";
case 3: // Float
case 7: // Decimal
return "number";
case 4: // Binary
case 15: // FixedSizeBinary
return "buffer";
case 5: // Utf8
return "string";
case 6: // Bool
return "boolean";
case 8: // Date
case 9: // Time
case 10: // Timestamp
return "date";
case 12: // List
case 16: // FixedSizeList
return "array";
case 13: // Struct
case 14: // Union
return "object";
case 11: // Interval
case 17: // Map
default:
return "other";
}
}
4 changes: 3 additions & 1 deletion src/dependencies.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ export const sql = dependency("sql.js", "1.7.0", "dist/sql-wasm.js");
export const vega = dependency("vega", "5.22.1", "build/vega.min.js");
export const vegalite = dependency("vega-lite", "5.5.0", "build/vega-lite.min.js");
export const vegaliteApi = dependency("vega-lite-api", "5.0.0", "build/vega-lite-api.min.js");
export const arrow = dependency("apache-arrow", "4.0.1", "Arrow.es2015.min.js");
export const arrow4 = dependency("apache-arrow", "4.0.1", "Arrow.es2015.min.js");
export const arrow9 = dependency("apache-arrow", "9.0.0", "+esm");
export const arquero = dependency("arquero", "4.8.8", "dist/arquero.min.js");
export const topojson = dependency("topojson-client", "3.1.0", "dist/topojson-client.min.js");
export const exceljs = dependency("exceljs", "4.3.0", "dist/exceljs.min.js");
export const mermaid = dependency("mermaid", "9.1.6", "dist/mermaid.min.js");
export const leaflet = dependency("leaflet", "1.8.0", "dist/leaflet.js");
export const duckdb = dependency("@duckdb/duckdb-wasm", "1.17.0", "+esm");
272 changes: 272 additions & 0 deletions src/duckdb.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
import {getArrowTableSchema, isArrowTable} from "./arrow.mjs";
import {arrow9 as arrow, duckdb} from "./dependencies.mjs";
import {FileAttachment} from "./fileAttachment.mjs";
import {cdn} from "./require.mjs";

// Adapted from https://observablehq.com/@cmudig/duckdb-client
// Copyright 2021 CMU Data Interaction Group
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

export class DuckDBClient {
constructor(db) {
Object.defineProperties(this, {
_db: {value: db}
});
}

async queryStream(query, params) {
const connection = await this._db.connect();
let reader, batch;
try {
reader = await connection.send(query, params);
batch = await reader.next();
if (batch.done) throw new Error("missing first batch");
} catch (error) {
await connection.close();
throw error;
}
return {
schema: getArrowTableSchema(batch.value),
async *readRows() {
try {
while (!batch.done) {
yield batch.value.toArray();
batch = await reader.next();
}
} finally {
await connection.close();
}
}
};
}

async query(query, params) {
const result = await this.queryStream(query, params);
const results = [];
for await (const rows of result.readRows()) {
for (const row of rows) {
results.push(row);
}
}
results.schema = result.schema;
return results;
}

async queryRow(query, params) {
const result = await this.queryStream(query, params);
const reader = result.readRows();
try {
const {done, value} = await reader.next();
return done || !value.length ? null : value[0];
} finally {
await reader.return();
}
}

async sql(strings, ...args) {
return await this.query(strings.join("?"), args);
}

queryTag(strings, ...params) {
return [strings.join("?"), params];
}

escape(name) {
return `"${name}"`;
}

async describeTables() {
const tables = await this.query(`SHOW TABLES`);
return tables.map(({name}) => ({name}));
}

async describeColumns({table} = {}) {
const columns = await this.query(`DESCRIBE ${table}`);
return columns.map(({column_name, column_type, null: nullable}) => ({
name: column_name,
type: getDuckDBType(column_type),
nullable: nullable !== "NO",
databaseType: column_type
}));
}

static async of(sources = {}, config = {}) {
const db = await createDuckDB();
if (config.query?.castTimestampToDate === undefined) {
config = {...config, query: {...config.query, castTimestampToDate: true}};
}
await db.open(config);
await Promise.all(
Object.entries(sources).map(async ([name, source]) => {
if (source instanceof FileAttachment) { // bare file
await insertFile(db, name, source);
} else if (isArrowTable(source)) { // bare arrow table
await insertArrowTable(db, name, source);
} else if (Array.isArray(source)) { // bare array of objects
await insertArray(db, name, source);
} else if ("data" in source) { // data + options
const {data, ...options} = source;
if (isArrowTable(data)) {
await insertArrowTable(db, name, data, options);
} else {
await insertArray(db, name, data, options);
}
} else if ("file" in source) { // file + options
const {file, ...options} = source;
await insertFile(db, name, file, options);
} else {
throw new Error(`invalid source: ${source}`);
}
})
);
return new DuckDBClient(db);
}
}

async function insertFile(database, name, file, options) {
const url = await file.url();
if (url.startsWith("blob:")) {
const buffer = await file.arrayBuffer();
await database.registerFileBuffer(file.name, new Uint8Array(buffer));
} else {
await database.registerFileURL(file.name, url);
}
const connection = await database.connect();
try {
switch (file.mimeType) {
case "text/csv":
return await connection.insertCSVFromPath(file.name, {
name,
schema: "main",
...options
});
case "application/json":
return await connection.insertJSONFromPath(file.name, {
name,
schema: "main",
...options
});
default:
if (/\.arrow$/i.test(file.name)) {
const buffer = new Uint8Array(await file.arrayBuffer());
return await connection.insertArrowFromIPCStream(buffer, {
name,
schema: "main",
...options
});
}
if (/\.parquet$/i.test(file.name)) {
return await connection.query(
`CREATE VIEW '${name}' AS SELECT * FROM parquet_scan('${file.name}')`
);
}
throw new Error(`unknown file type: ${file.mimeType}`);
}
} finally {
await connection.close();
}
}

async function insertArrowTable(database, name, table, options) {
const arrow = await loadArrow();
const buffer = arrow.tableToIPC(table);
const connection = await database.connect();
try {
await connection.insertArrowFromIPCStream(buffer, {
name,
schema: "main",
...options
});
} finally {
await connection.close();
}
}

async function insertArray(database, name, array, options) {
const arrow = await loadArrow();
const table = arrow.tableFromJSON(array);
return await insertArrowTable(database, name, table, options);
}

async function createDuckDB() {
const duck = await import(`${cdn}${duckdb.resolve()}`);
const bundle = await duck.selectBundle({
mvp: {
mainModule: `${cdn}${duckdb.resolve("dist/duckdb-mvp.wasm")}`,
mainWorker: `${cdn}${duckdb.resolve("dist/duckdb-browser-mvp.worker.js")}`
},
eh: {
mainModule: `${cdn}${duckdb.resolve("dist/duckdb-eh.wasm")}`,
mainWorker: `${cdn}${duckdb.resolve("dist/duckdb-browser-eh.worker.js")}`
}
});
const logger = new duck.ConsoleLogger();
const worker = await duck.createWorker(bundle.mainWorker);
const db = new duck.AsyncDuckDB(logger, worker);
await db.instantiate(bundle.mainModule);
return db;
}

async function loadArrow() {
return await import(`${cdn}${arrow.resolve()}`);
}

// https://duckdb.org/docs/sql/data_types/overview
function getDuckDBType(type) {
switch (type) {
case "BIGINT":
case "HUGEINT":
case "UBIGINT":
return "bigint";
case "DOUBLE":
case "REAL":
return "number";
case "INTEGER":
case "SMALLINT":
case "TINYINT":
case "USMALLINT":
case "UINTEGER":
case "UTINYINT":
return "integer";
case "BOOLEAN":
return "boolean";
case "DATE":
case "TIMESTAMP":
case "TIMESTAMP WITH TIME ZONE":
return "date";
case "VARCHAR":
case "UUID":
return "string";
// case "BLOB":
// case "INTERVAL":
// case "TIME":
default:
if (/^DECIMAL\(/.test(type)) return "integer";
return "other";
}
}
Loading