diff --git a/arbalister/arrow.py b/arbalister/arrow.py index 365631e..3157b36 100644 --- a/arbalister/arrow.py +++ b/arbalister/arrow.py @@ -2,7 +2,7 @@ import pathlib from typing import Any, Callable, Self -import datafusion as dtfn +import datafusion as dn import pyarrow as pa @@ -35,7 +35,7 @@ def from_filename(cls, file: pathlib.Path | str) -> Self: raise ValueError(f"Unknown file type {file_type}") -ReadCallable = Callable[..., dtfn.DataFrame] +ReadCallable = Callable[..., dn.DataFrame] def _arrow_to_avro_type(field: pa.Field) -> str | dict[str, Any]: @@ -93,17 +93,17 @@ def get_table_reader(format: FileFormat) -> ReadCallable: out: ReadCallable match format: case FileFormat.Avro: - out = dtfn.SessionContext.read_avro + out = dn.SessionContext.read_avro case FileFormat.Csv: - out = dtfn.SessionContext.read_csv + out = dn.SessionContext.read_csv case FileFormat.Parquet: - out = dtfn.SessionContext.read_parquet + out = dn.SessionContext.read_parquet case FileFormat.Ipc: import pyarrow.feather def read_ipc( - ctx: dtfn.SessionContext, path: str | pathlib.Path, **kwargs: dict[str, Any] - ) -> dtfn.DataFrame: + ctx: dn.SessionContext, path: str | pathlib.Path, **kwargs: dict[str, Any] + ) -> dn.DataFrame: # table = pyarrow.feather.read_table(path, {**{"memory_map": True}, **kwargs}) table = pyarrow.feather.read_table(path, **kwargs) return ctx.from_arrow(table) @@ -115,8 +115,8 @@ def read_ipc( import pyarrow.orc def read_orc( - ctx: dtfn.SessionContext, path: str | pathlib.Path, **kwargs: dict[str, Any] - ) -> dtfn.DataFrame: + ctx: dn.SessionContext, path: str | pathlib.Path, **kwargs: dict[str, Any] + ) -> dn.DataFrame: table = pyarrow.orc.read_table(path, **kwargs) return ctx.from_arrow(table) diff --git a/arbalister/routes.py b/arbalister/routes.py index e3b6ddc..4d12ddf 100644 --- a/arbalister/routes.py +++ b/arbalister/routes.py @@ -2,8 +2,8 @@ import os import pathlib -import datafusion as dtfn -import datafusion.functions as fn +import datafusion as dn +import datafusion.functions as dnf import jupyter_server.base.handlers import jupyter_server.serverapp import pyarrow as pa @@ -17,24 +17,17 @@ class BaseRouteHandler(jupyter_server.base.handlers.APIHandler): """A base handler to share common methods.""" - def initialize(self, context: dtfn.SessionContext) -> None: + def initialize(self, context: dn.SessionContext) -> None: """Process custom constructor arguments.""" super().initialize() self.context = context - def make(self) -> dtfn.SessionConfig: - """Return the datafusion config.""" - config = dtfn.SessionConfig() - # String views do not get written properly to IPC - config.set("datafusion.execution.parquet.schema_force_view_types", "false") - return config - def data_file(self, path: str) -> pathlib.Path: """Return the file that is requested by the URL path.""" root_dir = pathlib.Path(os.path.expanduser(self.settings["server_root_dir"])).resolve() return root_dir / path - def dataframe(self, path: str) -> dtfn.DataFrame: + def dataframe(self, path: str) -> dn.DataFrame: """Return the DataFusion lazy DataFrame. Note: On some file type, the file is read eagerly when calling this method. @@ -52,8 +45,10 @@ def get_query_params_as[T](self, dataclass_type: type[T]) -> T: class IpcParams: """Query parameter for IPC data.""" - per_chunk: int | None = None - chunk: int | None = None + row_chunk_size: int | None = None + row_chunk: int | None = None + col_chunk_size: int | None = None + col_chunk: int | None = None class IpcRouteHandler(BaseRouteHandler): @@ -66,11 +61,17 @@ async def get(self, path: str) -> None: self.set_header("Content-Type", "application/vnd.apache.arrow.stream") - df: dtfn.DataFrame = self.dataframe(path) + df: dn.DataFrame = self.dataframe(path) + + if params.row_chunk_size is not None and params.row_chunk is not None: + offset: int = params.row_chunk * params.row_chunk_size + df = df.limit(count=params.row_chunk_size, offset=offset) - if params.per_chunk is not None and params.chunk is not None: - offset: int = params.chunk * params.per_chunk - df = df.limit(count=params.per_chunk, offset=offset) + if params.col_chunk_size is not None and params.col_chunk is not None: + col_names = df.schema().names + start: int = params.col_chunk * params.col_chunk_size + end: int = start + params.col_chunk_size + df = df.select(*col_names[start:end]) table: pa.Table = df.to_arrow_table() @@ -113,18 +114,23 @@ async def get(self, path: str) -> None: # No dedicated exception type coming from DataFusion if str(e).startswith("DataFusion"): first_col: str = schema.names[0] - batches = df.aggregate([], [fn.count(dtfn.col(first_col))]).collect() + batches = df.aggregate([], [dnf.count(dn.col(first_col))]).collect() num_rows = batches[0].column(0)[0].as_py() response = StatsResponse(num_cols=len(schema), num_rows=num_rows) await self.finish(dataclasses.asdict(response)) -def make_datafusion_config() -> dtfn.SessionConfig: +def make_datafusion_config() -> dn.SessionConfig: """Return the datafusion config.""" - config = dtfn.SessionConfig() - # String views do not get written properly to IPC - config.set("datafusion.execution.parquet.schema_force_view_types", "false") + config = ( + dn.SessionConfig() + # Must use a single partition otherwise limit parallelism will return arbitrary rows + .with_target_partitions(1) + # String views do not get written properly to IPC + .set("datafusion.execution.parquet.schema_force_view_types", "false") + ) + return config @@ -133,7 +139,7 @@ def setup_route_handlers(web_app: jupyter_server.serverapp.ServerWebApplication) host_pattern = ".*$" base_url = web_app.settings["base_url"] - context = dtfn.SessionContext(make_datafusion_config()) + context = dn.SessionContext(make_datafusion_config()) handlers = [ (url_path_join(base_url, r"arrow/stream/([^?]*)"), IpcRouteHandler, {"context": context}), diff --git a/arbalister/tests/test_routes.py b/arbalister/tests/test_routes.py index 2dae426..90c1cf0 100644 --- a/arbalister/tests/test_routes.py +++ b/arbalister/tests/test_routes.py @@ -1,6 +1,9 @@ +import dataclasses import json import pathlib -from typing import Awaitable, Callable +import random +import string +from typing import Awaitable, Callable, Final import pyarrow as pa import pytest @@ -9,21 +12,29 @@ import arbalister as arb -@pytest.fixture(params=list(arb.arrow.FileFormat)) +@pytest.fixture(params=list(arb.arrow.FileFormat), scope="session") def file_format(request: pytest.FixtureRequest) -> arb.arrow.FileFormat: """Parametrize the file format used in the test.""" out: arb.arrow.FileFormat = request.param return out -@pytest.fixture +DUMMY_TABLE_ROW_COUNT: Final = 10 +DUMMY_TABLE_COL_COUNT: Final = 4 + + +@pytest.fixture(scope="module") def dummy_table() -> pa.Table: """Generate a table with fake data.""" data = { - "letter": list("abcdefghij"), - "number": list(range(10)), + "lower": random.choices(string.ascii_lowercase, k=DUMMY_TABLE_ROW_COUNT), + "sequence": list(range(DUMMY_TABLE_ROW_COUNT)), + "upper": random.choices(string.ascii_uppercase, k=DUMMY_TABLE_ROW_COUNT), + "number": [random.random() for _ in range(DUMMY_TABLE_ROW_COUNT)], } - return pa.table(data) + table = pa.table(data) + assert len(table.schema) == DUMMY_TABLE_COL_COUNT + return table @pytest.fixture @@ -40,36 +51,75 @@ def dummy_table_file( JpFetch = Callable[..., Awaitable[tornado.httpclient.HTTPResponse]] -async def test_ipc_route(jp_fetch: JpFetch, dummy_table: pa.Table, dummy_table_file: pathlib.Path) -> None: - """Test fetching a file returns the correct data in IPC.""" - response = await jp_fetch("arrow/stream/", str(dummy_table_file)) - - assert response.code == 200 - assert response.headers["Content-Type"] == "application/vnd.apache.arrow.stream" - - payload = pa.ipc.open_stream(response.body).read_all() - assert dummy_table.num_rows == payload.num_rows - assert dummy_table.cast(payload.schema) == payload - - -async def test_ipc_route_limit( - jp_fetch: JpFetch, dummy_table: pa.Table, dummy_table_file: pathlib.Path +@pytest.mark.parametrize( + "params", + [ + arb.routes.IpcParams(), + # Limit only number of rows + arb.routes.IpcParams(row_chunk=0, row_chunk_size=3), + arb.routes.IpcParams(row_chunk=1, row_chunk_size=2), + arb.routes.IpcParams(row_chunk=0, row_chunk_size=DUMMY_TABLE_ROW_COUNT), + arb.routes.IpcParams(row_chunk=1, row_chunk_size=DUMMY_TABLE_ROW_COUNT // 2 + 1), + # Limit only number of cols + arb.routes.IpcParams(col_chunk=0, col_chunk_size=3), + arb.routes.IpcParams(col_chunk=1, col_chunk_size=2), + arb.routes.IpcParams(col_chunk=0, col_chunk_size=DUMMY_TABLE_COL_COUNT), + arb.routes.IpcParams(col_chunk=1, col_chunk_size=DUMMY_TABLE_COL_COUNT // 2 + 1), + # Limit both + arb.routes.IpcParams( + row_chunk=0, + row_chunk_size=3, + col_chunk=1, + col_chunk_size=DUMMY_TABLE_COL_COUNT // 2 + 1, + ), + arb.routes.IpcParams( + row_chunk=0, + row_chunk_size=DUMMY_TABLE_ROW_COUNT, + col_chunk=1, + col_chunk_size=2, + ), + # Schema only + arb.routes.IpcParams( + row_chunk=0, + row_chunk_size=0, + ), + ], +) +async def test_ipc_route_limit_row( + jp_fetch: JpFetch, + dummy_table: pa.Table, + dummy_table_file: pathlib.Path, + params: arb.routes.IpcParams, ) -> None: - """Test fetching a file returns the limited data in IPC.""" - num_rows = 2 - chunk = 1 + """Test fetching a file returns the limited rows and columns in IPC.""" response = await jp_fetch( "arrow/stream", str(dummy_table_file), - params={"per_chunk": num_rows, "chunk": chunk}, + params={k: v for k, v in dataclasses.asdict(params).items() if v is not None}, ) assert response.code == 200 assert response.headers["Content-Type"] == "application/vnd.apache.arrow.stream" - payload = pa.ipc.open_stream(response.body).read_all() - assert payload.num_rows == num_rows - assert dummy_table.slice(chunk * num_rows, num_rows).cast(payload.schema) == payload + + expected = dummy_table + + # Row slicing + if (size := params.row_chunk_size) is not None and (cidx := params.row_chunk) is not None: + expected_num_rows = min((size * (cidx + 1)), expected.num_rows) - (size * cidx) + assert payload.num_rows == expected_num_rows + expected = expected.slice(cidx * size, size) + + # Col slicing + if (size := params.col_chunk_size) is not None and (cidx := params.col_chunk) is not None: + expected_num_cols = min((size * (cidx + 1)), len(expected.schema)) - (size * cidx) + assert len(payload.schema) == expected_num_cols + col_names = expected.schema.names + start = cidx * size + end = start + size + expected = expected.select(col_names[start:end]) + + assert expected.cast(payload.schema) == payload async def test_stats_route(jp_fetch: JpFetch, dummy_table: pa.Table, dummy_table_file: pathlib.Path) -> None: diff --git a/data/generate.py b/data/generate.py index c4b7df6..b14e24e 100644 --- a/data/generate.py +++ b/data/generate.py @@ -2,26 +2,32 @@ import pathlib import random +import datafusion as dn +import datafusion.functions as dnf import faker import pyarrow as pa +import pyarrow.parquet as paq import arbalister.arrow as aa MAX_FAKER_ROWS = 100_000 -def widen(field: pa.Field) -> pa.Field: - """Adapt Arrow schema for large files.""" +def _widen_field(field: pa.Field) -> pa.Field: return pa.field(field.name, pa.large_string()) if pa.types.is_string(field.type) else field +def widen(table: pa.Table) -> pa.Table: + """Adapt Arrow schema for large files.""" + return table.cast(pa.schema([_widen_field(f) for f in table.schema])) + + def generate_table(num_rows: int) -> pa.Table: """Generate a table with fake data.""" if num_rows > MAX_FAKER_ROWS: - table = generate_table(MAX_FAKER_ROWS) - widened = table.cast(pa.schema([widen(f) for f in table.schema])) + table = widen(generate_table(MAX_FAKER_ROWS)) n_repeat = num_rows // MAX_FAKER_ROWS - large_table = pa.concat_tables([widened] * n_repeat, promote_options="default") + large_table = pa.concat_tables([table] * n_repeat, promote_options="default") return large_table.slice(0, num_rows) gen = faker.Faker() @@ -34,6 +40,57 @@ def generate_table(num_rows: int) -> pa.Table: return pa.table(data) +def _generate_coordinate_table_slice( + row_start: int, row_end: int, num_cols: int, ctx: dn.SessionContext +) -> pa.Table: + row_idx: pa.Array = pa.array(range(row_start, row_end), type=pa.int64()) + table = pa.table({"row": row_idx}) + table = ( + ctx.from_arrow(table) + .with_columns( + *[ + dnf.concat( + dn.lit("("), # type: ignore[no-untyped-call] + dnf.col("row"), + dn.lit(f", {j})"), # type: ignore[no-untyped-call] + ).alias(f"col_{j}") + for j in range(num_cols) + ] + ) + .drop("row") + .to_arrow_table() + ) + return widen(table) + + +def _sink_coordinate_table( + num_rows: int, + num_cols: int, + writer: paq.ParquetWriter, + ctx: dn.SessionContext, + chunk_size: int = 100_000, +) -> None: + for row_start in range(0, num_rows, chunk_size): + print(f"Generating {row_start}", flush=True) + row_end = min(row_start + chunk_size, num_rows) + table = _generate_coordinate_table_slice(row_start, row_end, num_cols=num_cols, ctx=ctx) + writer.write(table) + + +def sink_coordinate_table( + num_rows: int, num_cols: int, path: pathlib.Path, chunk_size: int = 1_000_000 +) -> None: + """Write iteratively a table where each cell has its coordinates.""" + assert aa.FileFormat.from_filename(path) == aa.FileFormat.Parquet + print("Initializing session context", flush=True) + ctx = dn.SessionContext() + print("Generating schema", flush=True) + schema = _generate_coordinate_table_slice(0, 1, num_cols=num_cols, ctx=ctx).schema + writer = paq.ParquetWriter(path, schema) + _sink_coordinate_table(num_rows=num_rows, num_cols=num_cols, writer=writer, ctx=ctx) + writer.close() + + def configure_command_single(cmd: argparse.ArgumentParser) -> argparse.ArgumentParser: """Configure single subcommand CLI options.""" cmd.add_argument("--output-file", "-o", type=pathlib.Path, required=True, help="Output file path") @@ -57,6 +114,16 @@ def configure_command_batch(cmd: argparse.ArgumentParser) -> argparse.ArgumentPa return cmd +def configure_command_coordinate(cmd: argparse.ArgumentParser) -> argparse.ArgumentParser: + """Configure coordinate subcommand CLI options.""" + cmd.add_argument( + "--output-file", "-o", type=pathlib.Path, required=True, help="Output file path, must be parquet." + ) + cmd.add_argument("--num-rows", type=int, default=1_000_000, help="Number of rows to generate") + cmd.add_argument("--num-cols", type=int, default=1000, help="Number of rows to generate") + return cmd + + def configure_argparse() -> argparse.ArgumentParser: """Configure CLI options.""" parser = argparse.ArgumentParser(description="Generate a table and write to file.") @@ -68,6 +135,9 @@ def configure_argparse() -> argparse.ArgumentParser: cmd_batch = subparsers.add_parser("batch", help="Generate a multiple tables with the same data.") configure_command_batch(cmd_batch) + cmd_batch = subparsers.add_parser("coordinate", help="Generate a coordinate table file.") + configure_command_coordinate(cmd_batch) + return parser @@ -103,6 +173,8 @@ def main() -> None: for p in args.output_file: ft = aa.FileFormat.from_filename(p) save_table(shuffle_table(table), p, ft) + case "coordinate": + sink_coordinate_table(num_rows=args.num_rows, num_cols=args.num_cols, path=args.output_file) if __name__ == "__main__": diff --git a/pixi.lock b/pixi.lock index 5fecd27..d5fd5a3 100644 --- a/pixi.lock +++ b/pixi.lock @@ -2570,7 +2570,7 @@ packages: - pypi: ./ name: arbalister version: 0.1.0 - sha256: 23c95ea4c9395af00dc733cedef7aed5a0023a18c5daa5a0f29d3eb0d4f3091b + sha256: b3c23e0901a5247469d7f00a61e13669ae82d954e1ae19f5074a2678a84efb23 requires_dist: - datafusion>=40.0 - jupyter-server>=2.4.0,<3 diff --git a/pyproject.toml b/pyproject.toml index 538b794..778a04f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -226,7 +226,10 @@ description = """Generate different data files to use in development in data/gen [tool.pixi.feature.dev.tasks.gen-data-large] cmd = """ -python data/generate.py single --num-rows 100_000_000 -o data/gen/large.parquet +python data/generate.py coordinate \ + --output-file data/gen/large.parquet \ + --num-rows 100_000_000 \ + --num-cols 100 \ """ outputs = ["data/gen/**/large.parquet"] description = """Generate a large parquet file.""" diff --git a/src/collection.ts b/src/collection.ts new file mode 100644 index 0000000..64f5968 --- /dev/null +++ b/src/collection.ts @@ -0,0 +1,38 @@ +export class PairMap { + private map = new Map(); + + set(key: [K1, K2], value: V): this { + this.map.set(JSON.stringify(key), value); + return this; + } + + get(key: [K1, K2]): V | undefined { + return this.map.get(JSON.stringify(key)); + } + + clear(): void { + this.map.clear(); + } + + delete(key: [K1, K2]): boolean { + return this.map.delete(JSON.stringify(key)); + } + + has(key: [K1, K2]): boolean { + return this.map.has(JSON.stringify(key)); + } + + get size(): number { + return this.map.size; + } + + forEach( + callbackfn: (value: V, key: [K1, K2], map: PairMap) => void, + // biome-ignore lint/suspicious/noExplicitAny: This is in the Map signature + thisArg?: any, + ): void { + this.map.forEach((value, key) => { + callbackfn.call(thisArg, value, JSON.parse(key), this); + }); + } +} diff --git a/src/model.ts b/src/model.ts index 1a8e2d6..e6f5cb5 100644 --- a/src/model.ts +++ b/src/model.ts @@ -2,25 +2,39 @@ import { DataModel } from "@lumino/datagrid"; import type * as Arrow from "apache-arrow"; +import { PairMap } from "./collection"; import { fetchStats, fetchTable } from "./requests"; -const CHUNK_ROW_COUNT = 1024; -const LOADING_REPR = ""; +export namespace ArrowModel { + export interface IOptions { + path: string; + rowChunkSize?: number; + colChunkSize?: number; + loadingRepr?: string; + } +} export class ArrowModel extends DataModel { - constructor(path: string) { + constructor(options: ArrowModel.IOptions) { super(); - this._path = path; + + this._path = options.path; + this._rowChunkSize = options.rowChunkSize ?? 512; + this._colChunkSize = options.colChunkSize ?? 24; + this._loadingRepr = options.loadingRepr ?? ""; + this._ready = this.initialize(); } protected async initialize(): Promise { - const [stats, chunk0] = await Promise.all([ + const [schema, stats, chunk00] = await Promise.all([ + this.fetchSchema(), fetchStats({ path: this._path }), - this.fetchChunk(0), + this.fetchChunk([0, 0]), ]); - this._chunks[0] = chunk0; + this._schema = schema; + this._chunks.set([0, 0], chunk00); this._numCols = stats.num_cols; this._numRows = stats.num_rows; } @@ -30,14 +44,7 @@ export class ArrowModel extends DataModel { } private get schema(): Arrow.Schema { - if (!this._chunks[0]) { - throw new Error("First chunk is null or undefined"); - } - const chunk = this._chunks[0]; - if (chunk instanceof Promise) { - throw new Error("schema is not an Arrow.Table"); - } - return chunk.schema; + return this._schema; } columnCount(region: DataModel.ColumnRegion): number { @@ -69,44 +76,69 @@ export class ArrowModel extends DataModel { } } - private dataBody(row: number, column: number): string { - const chunk_idx: number = Math.floor(row / CHUNK_ROW_COUNT); + private dataBody(row: number, col: number): string { + const row_chunk: number = Math.floor(row / this._rowChunkSize); + const col_chunk: number = Math.floor(col / this._colChunkSize); + const chunk_idx: [number, number] = [row_chunk, col_chunk]; - if (chunk_idx in this._chunks) { - const chunk = this._chunks[chunk_idx]; + if (this._chunks.has(chunk_idx)) { + const chunk = this._chunks.get(chunk_idx)!; if (chunk instanceof Promise) { // Wait for Promise to complete and mark data as modified - return LOADING_REPR; + return this._loadingRepr; } // We have data - const chunk_row_idx = row % CHUNK_ROW_COUNT; - return chunk.getChildAt(column)?.get(chunk_row_idx).toString(); + const row_idx_in_chunk = row % this._rowChunkSize; + const col_idx_in_chunk = col % this._colChunkSize; + return chunk.getChildAt(col_idx_in_chunk)?.get(row_idx_in_chunk).toString(); } // Fetch data, however we cannot await it due to the interface required by the DataGrid. // Instead, we fire the request, and notify of change upon completion. - this._chunks[chunk_idx] = this.fetchChunk(chunk_idx).then((table) => { - this._chunks[chunk_idx] = table; + const promise = this.fetchChunk(chunk_idx).then((table) => { + this._chunks.set(chunk_idx, table); this.emitChanged({ type: "cells-changed", region: "body", - row: chunk_idx * CHUNK_ROW_COUNT, - rowSpan: CHUNK_ROW_COUNT, - column: 0, - columnSpan: this._numCols, + row: row_chunk * this._rowChunkSize, + rowSpan: this._rowChunkSize, + column: col_chunk * this._colChunkSize, + columnSpan: this._colChunkSize, }); }); + this._chunks.set([row_chunk, col_chunk], promise); - return LOADING_REPR; + return this._loadingRepr; } - private async fetchChunk(chunk_idx: number) { - return await fetchTable({ path: this._path, per_chunk: CHUNK_ROW_COUNT, chunk: chunk_idx }); + private async fetchChunk(chunk_idx: [number, number]) { + const [row_chunk, col_chunk] = chunk_idx; + return await fetchTable({ + path: this._path, + row_chunk_size: this._rowChunkSize, + row_chunk: row_chunk, + col_chunk_size: this._colChunkSize, + col_chunk: col_chunk, + }); } + private async fetchSchema() { + const table = await fetchTable({ + path: this._path, + row_chunk_size: 0, + row_chunk: 0, + }); + return table.schema; + } + + private _path: string; + private _rowChunkSize: number; + private _colChunkSize: number; + private _loadingRepr: string; + private _numRows: number = 0; private _numCols: number = 0; - private _path: string; - private _chunks: { [key: number]: Arrow.Table | Promise } = {}; + private _schema!: Arrow.Schema; + private _chunks: PairMap> = new PairMap(); private _ready: Promise; } diff --git a/src/requests.ts b/src/requests.ts index ddc35e1..d3f4cc9 100644 --- a/src/requests.ts +++ b/src/requests.ts @@ -18,17 +18,25 @@ export async function fetchStats(params: StatsParams): Promise { export interface TableParams { readonly path: string; - readonly per_chunk?: number; - readonly chunk?: number; + readonly row_chunk_size?: number; + readonly row_chunk?: number; + readonly col_chunk_size?: number; + readonly col_chunk?: number; } export async function fetchTable(params: TableParams): Promise { const query: string[] = []; - if (params.per_chunk !== undefined) { - query.push(`per_chunk=${encodeURIComponent(params.per_chunk)}`); + if (params.row_chunk_size !== undefined) { + query.push(`row_chunk_size=${encodeURIComponent(params.row_chunk_size)}`); } - if (params.chunk !== undefined) { - query.push(`chunk=${encodeURIComponent(params.chunk)}`); + if (params.row_chunk !== undefined) { + query.push(`row_chunk=${encodeURIComponent(params.row_chunk)}`); + } + if (params.col_chunk_size !== undefined) { + query.push(`col_chunk_size=${encodeURIComponent(params.col_chunk_size)}`); + } + if (params.col_chunk !== undefined) { + query.push(`col_chunk=${encodeURIComponent(params.col_chunk)}`); } const queryString = query.length ? `?${query.join("&")}` : ""; const url = `/arrow/stream/${params.path}${queryString}`; diff --git a/src/widget.ts b/src/widget.ts index 815e2d8..6ed9669 100644 --- a/src/widget.ts +++ b/src/widget.ts @@ -73,7 +73,7 @@ export class ArrowGridViewer extends Panel { } private async _updateGrid() { - const model = new ArrowModel(this.path); + const model = new ArrowModel({ path: this.path }); await model.ready; this._grid.dataModel = model; }