Skip to content

Commit 36f4ace

Browse files
committed
Apache Arrow for table cells
1 parent 6ba05ed commit 36f4ace

File tree

8 files changed

+55
-13
lines changed

8 files changed

+55
-13
lines changed

.eslintrc.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"extends": "eslint:recommended",
33
"parserOptions": {
44
"sourceType": "module",
5-
"ecmaVersion": 2018
5+
"ecmaVersion": 2020
66
},
77
"env": {
88
"es6": true,

bin/resolve-dependencies

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,11 @@ const mains = ["unpkg", "jsdelivr", "browser", "main"];
6464
}
6565
{
6666
const package = await resolve("apache-arrow@4");
67-
console.log(`export const arrow = dependency("${package.name}", "${package.version}", "${package.export}");`);
67+
console.log(`export const arrow4 = dependency("${package.name}", "${package.version}", "${package.export}");`);
68+
}
69+
{
70+
const package = await resolve("apache-arrow@9");
71+
console.log(`export const arrow9 = dependency("${package.name}", "${package.version}", "+esm");`);
6872
}
6973
{
7074
const package = await resolve("arquero");

src/dependencies.mjs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ export const sql = dependency("sql.js", "1.7.0", "dist/sql-wasm.js");
1313
export const vega = dependency("vega", "5.22.1", "build/vega.min.js");
1414
export const vegalite = dependency("vega-lite", "5.5.0", "build/vega-lite.min.js");
1515
export const vegaliteApi = dependency("vega-lite-api", "5.0.0", "build/vega-lite-api.min.js");
16-
export const arrow = dependency("apache-arrow", "4.0.1", "Arrow.es2015.min.js");
16+
export const arrow4 = dependency("apache-arrow", "4.0.1", "Arrow.es2015.min.js");
17+
export const arrow9 = dependency("apache-arrow", "9.0.0", "+esm");
1718
export const arquero = dependency("arquero", "4.8.8", "dist/arquero.min.js");
1819
export const topojson = dependency("topojson-client", "3.1.0", "dist/topojson-client.min.js");
1920
export const exceljs = dependency("exceljs", "4.3.0", "dist/exceljs.min.js");

src/fileAttachment.mjs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import {autoType, csvParse, csvParseRows, tsvParse, tsvParseRows} from "d3-dsv";
2-
import {arrow, jszip, exceljs} from "./dependencies.mjs";
3-
import {requireDefault} from "./require.mjs";
2+
import {arrow4, arrow9, jszip, exceljs} from "./dependencies.mjs";
3+
import {cdn, requireDefault} from "./require.mjs";
44
import {SQLiteDatabaseClient} from "./sqlite.mjs";
55
import {Workbook} from "./xlsx.mjs";
66

@@ -56,9 +56,18 @@ export class AbstractFile {
5656
i.src = url;
5757
});
5858
}
59-
async arrow() {
60-
const [Arrow, response] = await Promise.all([requireDefault(arrow.resolve()), remote_fetch(this)]);
61-
return Arrow.Table.from(response);
59+
async arrow({version = 4} = {}) {
60+
switch (version) {
61+
case 4: {
62+
const [Arrow, response] = await Promise.all([requireDefault(arrow4.resolve()), remote_fetch(this)]);
63+
return Arrow.Table.from(response);
64+
}
65+
case 9: {
66+
const [Arrow, response] = await Promise.all([import(`${cdn}${arrow9.resolve()}`), remote_fetch(this)]);
67+
return Arrow.tableFromIPC(response);
68+
}
69+
default: throw new Error(`unsupported arrow version: ${version}`);
70+
}
6271
}
6372
async sqlite() {
6473
return SQLiteDatabaseClient.open(remote_fetch(this));

src/index.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
export {default as FileAttachments, AbstractFile} from "./fileAttachment.mjs";
22
export {default as Library} from "./library.mjs";
3-
export {makeQueryTemplate, loadDataSource, arrayIsPrimitive, isDataArray, isDatabaseClient} from "./table.mjs";
3+
export {makeQueryTemplate, loadDataSource, arrayIsPrimitive, isArrowTable, isDataArray, isDatabaseClient} from "./table.mjs";

src/library.mjs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import svg from "./svg.mjs";
1717
import tex from "./tex.mjs";
1818
import vegalite from "./vegalite.mjs";
1919
import width from "./width.mjs";
20-
import {arquero, arrow, d3, graphviz, htl, inputs, lodash, plot, topojson} from "./dependencies.mjs";
20+
import {arquero, arrow4, d3, graphviz, htl, inputs, lodash, plot, topojson} from "./dependencies.mjs";
2121
import {__query} from "./table.mjs";
2222

2323
export default Object.assign(Object.defineProperties(function Library(resolver) {
@@ -39,8 +39,8 @@ export default Object.assign(Object.defineProperties(function Library(resolver)
3939
// Recommended libraries
4040
// https://observablehq.com/@observablehq/recommended-libraries
4141
_: () => require(lodash.resolve()),
42-
aq: () => require.alias({"apache-arrow": arrow.resolve()})(arquero.resolve()),
43-
Arrow: () => require(arrow.resolve()),
42+
aq: () => require.alias({"apache-arrow": arrow4.resolve()})(arquero.resolve()),
43+
Arrow: () => require(arrow4.resolve()),
4444
d3: () => require(d3.resolve()),
4545
Inputs: () => require(inputs.resolve()).then(Inputs => ({...Inputs, file: Inputs.fileOf(AbstractFile)})),
4646
L: () => leaflet(require),

src/require.mjs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import {require as initialRequire, requireFrom} from "d3-require";
22

3+
// TODO Allow this to be overridden using the Library’s resolver.
4+
export const cdn = "https://cdn.observableusercontent.com/npm/";
5+
36
export let requireDefault = initialRequire;
47

58
export function setDefaultRequire(require) {

src/table.mjs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,20 @@ export function isDatabaseClient(value, mode) {
2323
);
2424
}
2525

26+
// Returns true if the vaue is an Apache Arrow table. This uses a “duck” test
27+
// (instead of strict instanceof) because we want it to work with a range of
28+
// Apache Arrow versions at least 7.0.0 or above.
29+
// https://arrow.apache.org/docs/7.0/js/classes/Arrow_dom.Table.html
30+
export function isArrowTable(value) {
31+
return (
32+
value &&
33+
typeof value.getChild === "function" &&
34+
typeof value.toArray === "function" &&
35+
value.schema &&
36+
Array.isArray(value.schema.fields)
37+
);
38+
}
39+
2640
// Returns true if the value is a typed array (for a single-column table), or if
2741
// it’s an array. In the latter case, the elements of the array must be
2842
// consistently typed: either plain objects or primitives or dates.
@@ -145,6 +159,7 @@ export const __query = Object.assign(
145159
source = await loadDataSource(await source, "table");
146160
if (isDatabaseClient(source)) return evaluateQuery(source, makeQueryTemplate(operations, source), invalidation);
147161
if (isDataArray(source)) return __table(source, operations);
162+
if (isArrowTable(source)) return __arrow(source, operations);
148163
if (!source) throw new Error("missing data source");
149164
throw new Error("invalid data source");
150165
},
@@ -164,6 +179,7 @@ export async function loadDataSource(source, mode) {
164179
case "text/csv": return source.csv({typed: true});
165180
case "text/tab-separated-values": return source.tsv({typed: true});
166181
case "application/json": return source.json();
182+
default: if (/\.arrow$/i.test(source.name)) return source.arrow({version: 9});
167183
}
168184
}
169185
if (mode === "table" || mode === "sql") {
@@ -390,8 +406,17 @@ function likeOperand(operand) {
390406
return {...operand, value: `%${operand.value}%`};
391407
}
392408

409+
// This function applies table cell operations to an in-memory Apache Arrow
410+
// table; it should be equivalent to the corresponding SQL query.
411+
function __arrow(source, operations) {
412+
operations;
413+
return source; // TODO
414+
}
415+
393416
// This function applies table cell operations to an in-memory table (array of
394-
// objects); it should be equivalent to the corresponding SQL query.
417+
// objects); it should be equivalent to the corresponding SQL query. TODO This
418+
// is only exported for testing, but we should be testing the public __query
419+
// instead of this internal method.
395420
export function __table(source, operations) {
396421
const input = source;
397422
let {schema, columns} = source;

0 commit comments

Comments
 (0)