Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"extends": "eslint:recommended",
"parserOptions": {
"sourceType": "module",
"ecmaVersion": 2018
"ecmaVersion": 2020
},
"env": {
"es6": true,
Expand Down
6 changes: 5 additions & 1 deletion bin/resolve-dependencies
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,11 @@ const mains = ["unpkg", "jsdelivr", "browser", "main"];
}
{
const package = await resolve("apache-arrow@4");
console.log(`export const arrow = dependency("${package.name}", "${package.version}", "${package.export}");`);
console.log(`export const arrow4 = dependency("${package.name}", "${package.version}", "${package.export}");`);
}
{
const package = await resolve("apache-arrow@9");
console.log(`export const arrow9 = dependency("${package.name}", "${package.version}", "+esm");`);
}
{
const package = await resolve("arquero");
Expand Down
3 changes: 2 additions & 1 deletion src/dependencies.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ export const sql = dependency("sql.js", "1.7.0", "dist/sql-wasm.js");
export const vega = dependency("vega", "5.22.1", "build/vega.min.js");
export const vegalite = dependency("vega-lite", "5.5.0", "build/vega-lite.min.js");
export const vegaliteApi = dependency("vega-lite-api", "5.0.0", "build/vega-lite-api.min.js");
export const arrow = dependency("apache-arrow", "4.0.1", "Arrow.es2015.min.js");
export const arrow4 = dependency("apache-arrow", "4.0.1", "Arrow.es2015.min.js");
export const arrow9 = dependency("apache-arrow", "9.0.0", "+esm");
export const arquero = dependency("arquero", "4.8.8", "dist/arquero.min.js");
export const topojson = dependency("topojson-client", "3.1.0", "dist/topojson-client.min.js");
export const exceljs = dependency("exceljs", "4.3.0", "dist/exceljs.min.js");
Expand Down
19 changes: 14 additions & 5 deletions src/fileAttachment.mjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import {autoType, csvParse, csvParseRows, tsvParse, tsvParseRows} from "d3-dsv";
import {arrow, jszip, exceljs} from "./dependencies.mjs";
import {requireDefault} from "./require.mjs";
import {arrow4, arrow9, jszip, exceljs} from "./dependencies.mjs";
import {cdn, requireDefault} from "./require.mjs";
import {SQLiteDatabaseClient} from "./sqlite.mjs";
import {Workbook} from "./xlsx.mjs";

Expand Down Expand Up @@ -56,9 +56,18 @@ export class AbstractFile {
i.src = url;
});
}
async arrow() {
const [Arrow, response] = await Promise.all([requireDefault(arrow.resolve()), remote_fetch(this)]);
return Arrow.Table.from(response);
async arrow({version = 4} = {}) {
switch (version) {
case 4: {
const [Arrow, response] = await Promise.all([requireDefault(arrow4.resolve()), remote_fetch(this)]);
return Arrow.Table.from(response);
}
case 9: {
const [Arrow, response] = await Promise.all([import(`${cdn}${arrow9.resolve()}`), remote_fetch(this)]);
return Arrow.tableFromIPC(response);
}
default: throw new Error(`unsupported arrow version: ${version}`);
}
}
async sqlite() {
return SQLiteDatabaseClient.open(remote_fetch(this));
Expand Down
2 changes: 1 addition & 1 deletion src/index.mjs
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
export {default as FileAttachments, AbstractFile} from "./fileAttachment.mjs";
export {default as Library} from "./library.mjs";
export {makeQueryTemplate, loadDataSource, arrayIsPrimitive, isDataArray, isDatabaseClient} from "./table.mjs";
export {makeQueryTemplate, loadDataSource, arrayIsPrimitive, isArrowTable, isDataArray, isDatabaseClient} from "./table.mjs";
6 changes: 3 additions & 3 deletions src/library.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import svg from "./svg.mjs";
import tex from "./tex.mjs";
import vegalite from "./vegalite.mjs";
import width from "./width.mjs";
import {arquero, arrow, d3, graphviz, htl, inputs, lodash, plot, topojson} from "./dependencies.mjs";
import {arquero, arrow4, d3, graphviz, htl, inputs, lodash, plot, topojson} from "./dependencies.mjs";
import {__query} from "./table.mjs";

export default Object.assign(Object.defineProperties(function Library(resolver) {
Expand All @@ -39,8 +39,8 @@ export default Object.assign(Object.defineProperties(function Library(resolver)
// Recommended libraries
// https://observablehq.com/@observablehq/recommended-libraries
_: () => require(lodash.resolve()),
aq: () => require.alias({"apache-arrow": arrow.resolve()})(arquero.resolve()),
Arrow: () => require(arrow.resolve()),
aq: () => require.alias({"apache-arrow": arrow4.resolve()})(arquero.resolve()),
Arrow: () => require(arrow4.resolve()),
d3: () => require(d3.resolve()),
Inputs: () => require(inputs.resolve()).then(Inputs => ({...Inputs, file: Inputs.fileOf(AbstractFile)})),
L: () => leaflet(require),
Expand Down
3 changes: 3 additions & 0 deletions src/require.mjs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import {require as initialRequire, requireFrom} from "d3-require";

// TODO Allow this to be overridden using the Library’s resolver.
export const cdn = "https://cdn.observableusercontent.com/npm/";

export let requireDefault = initialRequire;

export function setDefaultRequire(require) {
Expand Down
27 changes: 26 additions & 1 deletion src/table.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@ export function isDatabaseClient(value, mode) {
);
}

// Returns true if the vaue is an Apache Arrow table. This uses a “duck” test
// (instead of strict instanceof) because we want it to work with a range of
// Apache Arrow versions at least 7.0.0 or above.
// https://arrow.apache.org/docs/7.0/js/classes/Arrow_dom.Table.html
export function isArrowTable(value) {
return (
value &&
typeof value.getChild === "function" &&
typeof value.toArray === "function" &&
value.schema &&
Array.isArray(value.schema.fields)
);
}

// Returns true if the value is a typed array (for a single-column table), or if
// it’s an array. In the latter case, the elements of the array must be
// consistently typed: either plain objects or primitives or dates.
Expand Down Expand Up @@ -145,6 +159,7 @@ export const __query = Object.assign(
source = await loadDataSource(await source, "table");
if (isDatabaseClient(source)) return evaluateQuery(source, makeQueryTemplate(operations, source), invalidation);
if (isDataArray(source)) return __table(source, operations);
if (isArrowTable(source)) return __arrow(source, operations);
if (!source) throw new Error("missing data source");
throw new Error("invalid data source");
},
Expand All @@ -164,6 +179,7 @@ export async function loadDataSource(source, mode) {
case "text/csv": return source.csv({typed: true});
case "text/tab-separated-values": return source.tsv({typed: true});
case "application/json": return source.json();
default: if (/\.arrow$/i.test(source.name)) return source.arrow({version: 9});
}
}
if (mode === "table" || mode === "sql") {
Expand Down Expand Up @@ -390,8 +406,17 @@ function likeOperand(operand) {
return {...operand, value: `%${operand.value}%`};
}

// This function applies table cell operations to an in-memory Apache Arrow
// table; it should be equivalent to the corresponding SQL query.
function __arrow(source, operations) {
operations;
return source; // TODO
}
Comment on lines +409 to +414
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the Arrow DataFrame class and predicates removed in Apache Arrow 7.0.0 and later (apache/arrow#10371), I’m not really sure there’s a practical way to implement basic data transformations like filter and sort directly on Arrow Tables. ☹️ At least, it looks hard. Perhaps we need to instantiate a DuckDBClient #313 or Arquero Table here?


// This function applies table cell operations to an in-memory table (array of
// objects); it should be equivalent to the corresponding SQL query.
// objects); it should be equivalent to the corresponding SQL query. TODO This
// is only exported for testing, but we should be testing the public __query
// instead of this internal method.
export function __table(source, operations) {
const input = source;
let {schema, columns} = source;
Expand Down