From 5d857b0a7f46632a0497061df7afc2c25086c8e0 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 10 Apr 2024 13:44:51 -0400 Subject: [PATCH 1/3] Send WebAssembly binary over Jupyter WebSocket --- lonboard/_map.py | 14 ++++++++++++- package.json | 2 +- pyproject.toml | 8 +++++++- src/index.tsx | 18 +++++++++++++---- src/parquet.ts | 52 +++++++++++++++++++++++++++++++++++++++--------- 5 files changed, 78 insertions(+), 16 deletions(-) diff --git a/lonboard/_map.py b/lonboard/_map.py index 5b9287d2..b0518ecc 100644 --- a/lonboard/_map.py +++ b/lonboard/_map.py @@ -1,6 +1,7 @@ from __future__ import annotations import sys +from functools import lru_cache from pathlib import Path from typing import IO, TYPE_CHECKING, Optional, Sequence, TextIO, Union @@ -46,6 +47,13 @@ """ +@lru_cache +def _load_parquet_wasm_binary() -> bytes: + """Load the gzipped parquet-wasm binary blob""" + with open(bundler_output_dir / "arrow2_bg.wasm.gz", "rb") as f: + return f.read() + + class Map(BaseAnyWidget): """ The top-level class used to display a map in a Jupyter Widget. @@ -91,10 +99,14 @@ def __init__( if isinstance(layers, BaseLayer): layers = [layers] - super().__init__(layers=layers, **kwargs) + _parquet_wasm_content = _load_parquet_wasm_binary() + super().__init__( + layers=layers, _parquet_wasm_content=_parquet_wasm_content, **kwargs + ) _esm = bundler_output_dir / "index.js" _css = bundler_output_dir / "index.css" + _parquet_wasm_content = traitlets.Bytes(allow_none=False).tag(sync=True) view_state = ViewStateTrait() """ diff --git a/package.json b/package.json index dd44bf68..71e37059 100644 --- a/package.json +++ b/package.json @@ -31,7 +31,7 @@ "vitest": "^1.4.0" }, "scripts": { - "build": "node ./build.mjs", + "build": "node ./build.mjs && gzip -c node_modules/parquet-wasm/esm/arrow2_bg.wasm > lonboard/static/arrow2_bg.wasm.gz", "build:watch": "nodemon --watch src/ --exec \"npm run build\" --ext js,json,ts,tsx,css", "fmt:check": "prettier './src/**/*.{ts,tsx,css}' --check", "fmt": "prettier './src/**/*.{ts,tsx,css}' --write", diff --git a/pyproject.toml b/pyproject.toml index 54026782..72c53d1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,13 @@ authors = ["Kyle Barron "] license = "MIT" readme = "README.md" packages = [{ include = "lonboard" }] -include = ["lonboard/static/*.js", "lonboard/static/*.css", "MANIFEST.in"] +include = [ + "lonboard/static/*.js", + "lonboard/static/*.css", + "lonboard/static/*.wasm", + "lonboard/static/*.wasm.gz", + "MANIFEST.in", +] [tool.poetry.dependencies] python = "^3.8" diff --git a/src/index.tsx b/src/index.tsx index 3bb13703..929d7771 100644 --- a/src/index.tsx +++ b/src/index.tsx @@ -7,7 +7,7 @@ import DeckGL from "@deck.gl/react/typed"; import { MapViewState, type Layer } from "@deck.gl/core/typed"; import { BaseLayerModel, initializeLayer } from "./model/index.js"; import type { WidgetModel } from "@jupyter-widgets/base"; -import { initParquetWasm } from "./parquet.js"; +import { useParquetWasm } from "./parquet.js"; import { getTooltip } from "./tooltip/index.js"; import { isDefined, loadChildModels } from "./util.js"; import { v4 as uuidv4 } from "uuid"; @@ -15,8 +15,6 @@ import { Message } from "./types.js"; import { flyTo } from "./actions/fly-to.js"; import { useViewStateDebounced } from "./state"; -await initParquetWasm(); - const DEFAULT_INITIAL_VIEW_STATE = { latitude: 10, longitude: 0, @@ -65,6 +63,10 @@ async function getChildModelState( function App() { let model = useModel(); + let [parquetWasmBinary] = useModelState( + "_parquet_wasm_content", + ); + let [parquetWasmReady] = useParquetWasm(parquetWasmBinary); let [mapStyle] = useModelState("basemap_style"); let [mapHeight] = useModelState("_height"); let [showTooltip] = useModelState("show_tooltip"); @@ -108,7 +110,15 @@ function App() { let [stateCounter, setStateCounter] = useState(new Date()); useEffect(() => { + if (!parquetWasmReady) { + return; + } + const callback = async () => { + if (!parquetWasmReady) { + throw new Error("inside callback but parquetWasm not ready!"); + } + const childModels = await loadChildModels( model.widget_manager, childLayerIds, @@ -122,7 +132,7 @@ function App() { setSubModelState(newSubModelState); }; callback().catch(console.error); - }, [childLayerIds]); + }, [parquetWasmReady, childLayerIds]); const layers: Layer[] = []; for (const subModel of Object.values(subModelState)) { diff --git a/src/parquet.ts b/src/parquet.ts index 719a005d..7171d48e 100644 --- a/src/parquet.ts +++ b/src/parquet.ts @@ -1,20 +1,36 @@ import { useEffect, useState } from "react"; -import _initParquetWasm, { readParquet } from "parquet-wasm/esm/arrow2"; +import { initSync, readParquet } from "parquet-wasm/esm/arrow2"; import * as arrow from "apache-arrow"; -// NOTE: this version must be synced exactly with the parquet-wasm version in -// use. -const PARQUET_WASM_VERSION = "0.5.0"; -const PARQUET_WASM_CDN_URL = `https://cdn.jsdelivr.net/npm/parquet-wasm@${PARQUET_WASM_VERSION}/esm/arrow2_bg.wasm`; let WASM_READY: boolean = false; -export async function initParquetWasm() { - if (WASM_READY) { - return; +// https://developer.mozilla.org/en-US/docs/Web/API/Compression_Streams_API +async function decompressBlob(blob: Blob) { + const ds = new DecompressionStream("gzip"); + const decompressedStream = blob.stream().pipeThrough(ds); + return await new Response(decompressedStream).blob(); +} + +/** + * Initialize parquet-wasm from an existing WASM binary blob. + * It is expected that this WASM has been gzipped + * + * @return Whether initialization succeeded + */ +export async function initParquetWasmFromBinary( + view: DataView | null, +): Promise { + if (!view) { + return false; } - await _initParquetWasm(PARQUET_WASM_CDN_URL); + let blob = new Blob([view]); + const decompressedBlob = await decompressBlob(blob); + const decompressedBuffer = await decompressedBlob.arrayBuffer(); + + initSync(decompressedBuffer); WASM_READY = true; + return true; } /** @@ -58,3 +74,21 @@ export function parseParquetBuffers(dataViews: DataView[]): arrow.Table { return new arrow.Table(batches); } + +export function useParquetWasm(view: DataView | null): [boolean] { + const [wasmReady, setWasmReady] = useState(false); + + // Init parquet wasm + useEffect(() => { + const callback = async () => { + const succeeded = await initParquetWasmFromBinary(view); + if (succeeded) { + setWasmReady(true); + } + }; + + callback(); + }, []); + + return [wasmReady]; +} From e1c857909292ac892a79fde91702b394da201c93 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 10 Apr 2024 14:13:16 -0400 Subject: [PATCH 2/3] Refactor into initialize function --- src/index.tsx | 24 +++++++++--------------- src/parquet.ts | 28 +--------------------------- 2 files changed, 10 insertions(+), 42 deletions(-) diff --git a/src/index.tsx b/src/index.tsx index 929d7771..5da2a1b6 100644 --- a/src/index.tsx +++ b/src/index.tsx @@ -1,13 +1,13 @@ import * as React from "react"; import { useState, useEffect } from "react"; import { createRender, useModelState, useModel } from "@anywidget/react"; -import type { Initialize, Render } from "@anywidget/types"; +import type { Initialize, InitializeProps, Render } from "@anywidget/types"; import Map from "react-map-gl/maplibre"; import DeckGL from "@deck.gl/react/typed"; import { MapViewState, type Layer } from "@deck.gl/core/typed"; import { BaseLayerModel, initializeLayer } from "./model/index.js"; import type { WidgetModel } from "@jupyter-widgets/base"; -import { useParquetWasm } from "./parquet.js"; +import { initParquetWasmFromBinary } from "./parquet.js"; import { getTooltip } from "./tooltip/index.js"; import { isDefined, loadChildModels } from "./util.js"; import { v4 as uuidv4 } from "uuid"; @@ -63,10 +63,6 @@ async function getChildModelState( function App() { let model = useModel(); - let [parquetWasmBinary] = useModelState( - "_parquet_wasm_content", - ); - let [parquetWasmReady] = useParquetWasm(parquetWasmBinary); let [mapStyle] = useModelState("basemap_style"); let [mapHeight] = useModelState("_height"); let [showTooltip] = useModelState("show_tooltip"); @@ -110,15 +106,7 @@ function App() { let [stateCounter, setStateCounter] = useState(new Date()); useEffect(() => { - if (!parquetWasmReady) { - return; - } - const callback = async () => { - if (!parquetWasmReady) { - throw new Error("inside callback but parquetWasm not ready!"); - } - const childModels = await loadChildModels( model.widget_manager, childLayerIds, @@ -132,7 +120,7 @@ function App() { setSubModelState(newSubModelState); }; callback().catch(console.error); - }, [parquetWasmReady, childLayerIds]); + }, [childLayerIds]); const layers: Layer[] = []; for (const subModel of Object.values(subModelState)) { @@ -200,7 +188,13 @@ function App() { ); } +async function initialize({ model }: InitializeProps): Promise { + const parquetWasmBinary: DataView = model.get("_parquet_wasm_content"); + await initParquetWasmFromBinary(parquetWasmBinary); +} + const module: { render: Render; initialize?: Initialize } = { + initialize, render: createRender(App), }; diff --git a/src/parquet.ts b/src/parquet.ts index 7171d48e..36f349d2 100644 --- a/src/parquet.ts +++ b/src/parquet.ts @@ -1,4 +1,3 @@ -import { useEffect, useState } from "react"; import { initSync, readParquet } from "parquet-wasm/esm/arrow2"; import * as arrow from "apache-arrow"; @@ -17,20 +16,13 @@ async function decompressBlob(blob: Blob) { * * @return Whether initialization succeeded */ -export async function initParquetWasmFromBinary( - view: DataView | null, -): Promise { - if (!view) { - return false; - } - +export async function initParquetWasmFromBinary(view: DataView): Promise { let blob = new Blob([view]); const decompressedBlob = await decompressBlob(blob); const decompressedBuffer = await decompressedBlob.arrayBuffer(); initSync(decompressedBuffer); WASM_READY = true; - return true; } /** @@ -74,21 +66,3 @@ export function parseParquetBuffers(dataViews: DataView[]): arrow.Table { return new arrow.Table(batches); } - -export function useParquetWasm(view: DataView | null): [boolean] { - const [wasmReady, setWasmReady] = useState(false); - - // Init parquet wasm - useEffect(() => { - const callback = async () => { - const succeeded = await initParquetWasmFromBinary(view); - if (succeeded) { - setWasmReady(true); - } - }; - - callback(); - }, []); - - return [wasmReady]; -} From eae5d2e3d154689ba2f2f94f3100ec6ec97f1623 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Wed, 10 Apr 2024 14:27:54 -0400 Subject: [PATCH 3/3] Don't decode parquet wasm blob twice --- src/parquet.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/parquet.ts b/src/parquet.ts index 36f349d2..39e218b7 100644 --- a/src/parquet.ts +++ b/src/parquet.ts @@ -17,6 +17,10 @@ async function decompressBlob(blob: Blob) { * @return Whether initialization succeeded */ export async function initParquetWasmFromBinary(view: DataView): Promise { + if (WASM_READY) { + return; + } + let blob = new Blob([view]); const decompressedBlob = await decompressBlob(blob); const decompressedBuffer = await decompressedBlob.arrayBuffer();