diff --git a/arbalister/file_format.py b/arbalister/file_format.py index d2d687a..2e3beac 100644 --- a/arbalister/file_format.py +++ b/arbalister/file_format.py @@ -30,6 +30,6 @@ def from_filename(cls, file: pathlib.Path | str) -> Self: match file_type: case "ipc" | "feather": return cls.Ipc - case "sqlite3" | "db" | ".db3", ".s3db", ".sl3": + case "sqlite3" | "db" | "db3" | "s3db" | "sl3": return cls.Sqlite raise ValueError(f"Unknown file type {file_type}") diff --git a/src/__tests__/model.spec.ts b/src/__tests__/model.spec.ts index 8855a16..f6ad753 100644 --- a/src/__tests__/model.spec.ts +++ b/src/__tests__/model.spec.ts @@ -5,6 +5,7 @@ import type * as Arrow from "apache-arrow"; import { ArrowModel } from "../model"; import { fetchStats, fetchTable } from "../requests"; +import type { FileInfo, FileOptions } from "../file_options"; import type * as Req from "../requests"; const MOCK_TABLE = tableFromArrays({ @@ -52,7 +53,7 @@ describe("ArrowModel", () => { (fetchTable as jest.Mock).mockImplementation(fetchTableMocked); (fetchStats as jest.Mock).mockImplementation(fetchStatsMocked); - const model = new ArrowModel({ path: "test/path.parquet" }, {}); + const model = new ArrowModel({ path: "test/path.parquet" }, {} as FileOptions, {} as FileInfo); it("should initialize data", async () => { await model.ready; @@ -70,4 +71,18 @@ describe("ArrowModel", () => { // First chunk is initialized expect(model.data("body", 0, 0)).toEqual(MOCK_TABLE.getChildAt(0)?.get(0).toString()); }); + + it("should reinitialize when fileOptions is set", async () => { + const model2 = new ArrowModel({ path: "test/data.csv" }, {} as FileOptions, {} as FileInfo); + await model2.ready; + + const initialStatsCallCount = (fetchStats as jest.Mock).mock.calls.length; + const initialTableCallCount = (fetchTable as jest.Mock).mock.calls.length; + + model2.fileOptions = { delimiter: ";" } as FileOptions; + await model2.ready; + + expect(fetchStats).toHaveBeenCalledTimes(initialStatsCallCount + 1); + expect(fetchTable).toHaveBeenCalledTimes(initialTableCallCount + 1); + }); }); diff --git a/src/file_options.ts b/src/file_options.ts index cf81573..7b340f7 100644 --- a/src/file_options.ts +++ b/src/file_options.ts @@ -1,9 +1,19 @@ export interface CsvOptions { - delimiter?: string; + delimiter: string; } -export const DEFAULT_CSV_OPTIONS: Required = { - delimiter: ",", -}; +export interface SqliteOptions { + table_name: string; +} + +export type FileOptions = CsvOptions | SqliteOptions; + +export interface SqliteFileInfo { + table_names: string[]; +} + +export interface CsvFileInfo { + delimiters: string[]; +} -export type FileOptions = CsvOptions; +export type FileInfo = SqliteFileInfo | CsvFileInfo; diff --git a/src/model.ts b/src/model.ts index 8a758e2..5ab7bad 100644 --- a/src/model.ts +++ b/src/model.ts @@ -3,8 +3,8 @@ import { DataModel } from "@lumino/datagrid"; import type * as Arrow from "apache-arrow"; import { PairMap } from "./collection"; -import { fetchStats, fetchTable } from "./requests"; -import type { FileOptions } from "./file_options"; +import { fetchFileInfo, fetchStats, fetchTable } from "./requests"; +import type { FileInfo, FileOptions } from "./file_options"; export namespace ArrowModel { export interface LoadingOptions { @@ -16,7 +16,18 @@ export namespace ArrowModel { } export class ArrowModel extends DataModel { - constructor(loadingOptions: ArrowModel.LoadingOptions, fileOptions: FileOptions) { + static async fromRemoteFileInfo(loadingOptions: ArrowModel.LoadingOptions) { + const { info: fileInfo, read_params: fileOptions } = await fetchFileInfo({ + path: loadingOptions.path, + }); + return new ArrowModel(loadingOptions, fileOptions, fileInfo); + } + + constructor( + loadingOptions: ArrowModel.LoadingOptions, + fileOptions: FileOptions, + fileInfo: FileInfo, + ) { super(); this._loadingParams = { @@ -26,6 +37,7 @@ export class ArrowModel extends DataModel { ...loadingOptions, }; this._fileOptions = fileOptions; + this._fileInfo = fileInfo; this._ready = this.initialize(); } @@ -37,9 +49,25 @@ export class ArrowModel extends DataModel { ]); this._schema = stats.schema; - this._chunks.set([0, 0], chunk00); this._numCols = stats.num_cols; this._numRows = stats.num_rows; + this._chunks = new PairMap(); + this._chunks.set([0, 0], chunk00); + } + + get fileInfo(): Readonly { + return this._fileInfo; + } + + get fileOptions(): Readonly { + return this._fileOptions; + } + + set fileOptions(fileOptions: FileOptions) { + this._fileOptions = fileOptions; + this._ready = this.initialize().then(() => { + this.emitChanged({ type: "model-reset" }); + }); } get ready(): Promise { @@ -167,7 +195,8 @@ export class ArrowModel extends DataModel { } private readonly _loadingParams: Required; - private readonly _fileOptions: FileOptions; + private readonly _fileInfo: FileInfo; + private _fileOptions: FileOptions; private _numRows: number = 0; private _numCols: number = 0; diff --git a/src/requests.ts b/src/requests.ts index 668377d..42b9b62 100644 --- a/src/requests.ts +++ b/src/requests.ts @@ -1,7 +1,22 @@ import { tableFromIPC } from "apache-arrow"; import type * as Arrow from "apache-arrow"; -import type { FileOptions } from "./file_options"; +import type { FileInfo, FileOptions } from "./file_options"; + +export interface FileInfoOptions { + path: string; +} + +export interface FileInfoResponse { + info: FileInfo; + read_params: FileOptions; +} + +export async function fetchFileInfo(params: Readonly): Promise { + const response = await fetch(`/file/info/${params.path}`); + const data: FileInfoResponse = await response.json(); + return data; +} export interface StatsOptions { path: string; @@ -35,14 +50,18 @@ type OptionalizeUnion = { export async function fetchStats( params: Readonly, ): Promise { - const queryKeys = ["path", "delimiter"] as const; + const queryKeys = ["path", "delimiter", "table_name"] as const; + const queryKeyMap: Record = { + tableName: "table_name", + }; const query = new URLSearchParams(); for (const key of queryKeys) { const value = (params as Readonly & OptionalizeUnion)[key]; if (value !== undefined && value != null) { - query.set(key, value.toString()); + const queryKey = queryKeyMap[key] || key; + query.set(queryKey, value.toString()); } } @@ -95,6 +114,7 @@ export async function fetchTable( "col_chunk_size", "col_chunk", "delimiter", + "table_name", ] as const; const query = new URLSearchParams(); diff --git a/src/toolbar.ts b/src/toolbar.ts index c04e3b8..eba332f 100644 --- a/src/toolbar.ts +++ b/src/toolbar.ts @@ -7,32 +7,30 @@ import { Widget } from "@lumino/widgets"; import type { ITranslator } from "@jupyterlab/translation"; import type { Message } from "@lumino/messaging"; -import type { CsvOptions } from "./file_options"; +import { FileType } from "./filetypes"; +import type { + CsvFileInfo, + CsvOptions, + FileInfo, + FileOptions, + SqliteFileInfo, + SqliteOptions, +} from "./file_options"; import type { ArrowGridViewer } from "./widget"; -export namespace CsvToolbar { - export interface Options { - gridViewer: ArrowGridViewer; - translator?: ITranslator; - } -} - -export class CsvToolbar extends Widget { - constructor(options: CsvToolbar.Options, fileOptions: Required) { - super({ - node: Private.createDelimiterNode(fileOptions.delimiter, options.translator), - }); - this._gridViewer = options.gridViewer; +/** + * Base toolbar class for file-specific options with a dropdown selector. + */ +abstract class DropdownToolbar extends Widget { + constructor(gridViewer: ArrowGridViewer, node: HTMLElement) { + super({ node }); + this._gridViewer = gridViewer; this.addClass("arrow-viewer-toolbar"); } - get fileOptions(): CsvOptions { - return { - delimiter: this.delimiterNode.value, - }; - } + abstract get fileOptions(): FileOptions; - get delimiterNode(): HTMLSelectElement { + get selectNode(): HTMLSelectElement { return this.node.getElementsByTagName("select")![0]; } @@ -57,43 +55,147 @@ export class CsvToolbar extends Widget { } protected onAfterAttach(_msg: Message): void { - this.delimiterNode.addEventListener("change", this); + this.selectNode.addEventListener("change", this); } protected onBeforeDetach(_msg: Message): void { - this.delimiterNode.removeEventListener("change", this); + this.selectNode.removeEventListener("change", this); } protected _gridViewer: ArrowGridViewer; } +export namespace CsvToolbar { + export interface Options { + gridViewer: ArrowGridViewer; + translator?: ITranslator; + } +} + +export class CsvToolbar extends DropdownToolbar { + constructor(options: CsvToolbar.Options, fileOptions: CsvOptions, fileInfo: CsvFileInfo) { + super( + options.gridViewer, + Private.createDelimiterNode(fileOptions.delimiter, fileInfo.delimiters, options.translator), + ); + } + + get fileOptions(): CsvOptions { + return { + delimiter: this.selectNode.value, + }; + } +} + +export namespace SqliteToolbar { + export interface Options { + gridViewer: ArrowGridViewer; + translator?: ITranslator; + } +} + +export class SqliteToolbar extends DropdownToolbar { + constructor( + options: SqliteToolbar.Options, + fileOptions: SqliteOptions, + fileInfo: SqliteFileInfo, + ) { + super( + options.gridViewer, + Private.createTableNameNode(fileOptions.table_name, fileInfo.table_names, options.translator), + ); + } + + get fileOptions(): SqliteOptions { + return { + table_name: this.selectNode.value, + }; + } +} + +/** + * Common options for toolbar creation. + */ +export interface ToolbarOptions { + gridViewer: ArrowGridViewer; + translator?: ITranslator; +} + +/** + * Factory function to create the appropriate toolbar for a given file type. + */ +export function createToolbar( + fileType: FileType, + options: ToolbarOptions, + fileOptions: FileOptions, + fileInfo: FileInfo, +): Widget | null { + switch (fileType) { + case FileType.Csv: + return new CsvToolbar(options, fileOptions as CsvOptions, fileInfo as CsvFileInfo); + case FileType.Sqlite: + return new SqliteToolbar(options, fileOptions as SqliteOptions, fileInfo as SqliteFileInfo); + default: + return null; + } +} + namespace Private { /** - * Create the node for the delimiter switcher. + * Create a labeled dropdown node with items. */ - export function createDelimiterNode(selected: string, translator?: ITranslator): HTMLElement { + function createLabeledDropdown( + label: string, + items: string[], + selected: string, + translator?: ITranslator, + ): HTMLElement { translator = translator || nullTranslator; const trans = translator?.load("jupyterlab"); + const options: [string, string][] = items.map((item) => [item, item]); + return createDropdownNode(trans.__(label), options, selected); + } + + /** + * Create the node for the delimiter switcher. + */ + export function createDelimiterNode( + selected: string, + delimiters: string[], + translator?: ITranslator, + ): HTMLElement { + return createLabeledDropdown("Delimiter: ", delimiters, selected, translator); + } - // The supported parsing delimiters and labels. - const delimiters = [ - [",", ","], - [";", ";"], - ["\\t", trans.__("tab")], - ["|", trans.__("pipe")], - ["#", trans.__("hash")], - ]; + /** + * Create the node for the table name switcher. + */ + export function createTableNameNode( + selected: string, + table_names: string[], + translator?: ITranslator, + ): HTMLElement { + return createLabeledDropdown("Table: ", table_names, selected, translator); + } + /** + * Create a generic dropdown node with a label and options. + */ + function createDropdownNode( + labelText: string, + options: Array<[string, string]>, + selected: string, + ): HTMLElement { const div = document.createElement("div"); const label = document.createElement("span"); const select = document.createElement("select"); - label.textContent = trans.__("Delimiter: "); + label.textContent = labelText; label.className = "toolbar-label"; - for (const [delimiter, label] of delimiters) { + for (const [value, displayLabel] of options) { const option = document.createElement("option"); - option.value = delimiter; - option.textContent = label; - if (delimiter === selected) { + option.value = value; + option.textContent = displayLabel; + if (value === selected) { option.selected = true; } select.appendChild(option); diff --git a/src/widget.ts b/src/widget.ts index fcbf6b7..388387c 100644 --- a/src/widget.ts +++ b/src/widget.ts @@ -12,11 +12,10 @@ import { Panel } from "@lumino/widgets"; import type { DocumentRegistry, IDocumentWidget } from "@jupyterlab/docregistry"; import type * as DataGridModule from "@lumino/datagrid"; -import { DEFAULT_CSV_OPTIONS } from "./file_options"; import { FileType } from "./filetypes"; import { ArrowModel } from "./model"; -import { CsvToolbar } from "./toolbar"; -import type { FileOptions } from "./file_options"; +import { createToolbar } from "./toolbar"; +import type { FileInfo, FileOptions } from "./file_options"; export namespace ArrowGridViewer { export interface Options { @@ -25,10 +24,9 @@ export namespace ArrowGridViewer { } export class ArrowGridViewer extends Panel { - constructor(options: ArrowGridViewer.Options, fileOptions: FileOptions) { + constructor(options: ArrowGridViewer.Options) { super(); this._options = options; - this._fileOptions = fileOptions; this.addClass("arrow-viewer"); @@ -68,13 +66,20 @@ export class ArrowGridViewer extends Panel { return this._options.path; } + private get dataModel(): ArrowModel { + return this._grid.dataModel as ArrowModel; + } + + get fileInfo(): Readonly { + return this.dataModel.fileInfo; + } + get fileOptions(): Readonly { - return this._fileOptions; + return this.dataModel.fileOptions; } set fileOptions(fileOptions: FileOptions) { - this._fileOptions = fileOptions; - this._updateGrid(); + this.dataModel.fileOptions = fileOptions; } updateFileOptions(fileOptionsUpdate: Partial) { @@ -111,7 +116,7 @@ export class ArrowGridViewer extends Panel { private async _updateGrid() { try { - const dataModel = new ArrowModel({ path: this.path }, this.fileOptions); + const dataModel = await ArrowModel.fromRemoteFileInfo({ path: this.path }); await dataModel.ready; this._grid.dataModel = dataModel; this._grid.selectionModel = new BasicSelectionModel({ dataModel }); @@ -153,7 +158,6 @@ export class ArrowGridViewer extends Panel { } private _options: ArrowGridViewer.Options; - private _fileOptions: FileOptions; private _grid: DataGridModule.DataGrid; private _revealed = new PromiseDelegate(); private _ready: Promise; @@ -168,19 +172,16 @@ export namespace ArrowGridDocumentWidget { } export class ArrowGridDocumentWidget extends DocumentWidget { - constructor(options: ArrowGridDocumentWidget.IOptions, fileOptions: FileOptions) { + constructor(options: ArrowGridDocumentWidget.IOptions) { let { content, context, reveal, ...other } = options; - content = content || ArrowGridDocumentWidget._createContent(context, fileOptions); - reveal = Promise.all([reveal, content.revealed, context.ready]); + content = content || ArrowGridDocumentWidget._createContent(context.path); + reveal = Promise.all([reveal, content.ready, content.revealed, context.ready]); super({ content, context, reveal, ...other }); this.addClass("arrow-viewer-base"); } - private static _createContent( - context: DocumentRegistry.IContext, - fileOptions: FileOptions, - ): ArrowGridViewer { - return new ArrowGridViewer({ path: context.path }, fileOptions); + private static _createContent(path: string): ArrowGridViewer { + return new ArrowGridViewer({ path }); } } @@ -195,39 +196,30 @@ export class ArrowGridViewerFactory extends ABCWidgetFactory { const translator = this.translator; - const ft = this.fileType(context.path); - - let fileOption: FileOptions = {}; - if (ft?.name === FileType.Csv) { - fileOption = DEFAULT_CSV_OPTIONS; - } - const widget = new ArrowGridDocumentWidget({ context, translator }, fileOption); + const widget = new ArrowGridDocumentWidget({ context, translator }); this.updateIcon(widget); + widget.content.ready.then(() => { + this.makeToolbarItems(widget.content).forEach(({ widget: toolbarItem, name }) => { + widget.toolbar.addItem(name, toolbarItem); + }); + }); return widget; } - /** - * Default factory for toolbar items to be added after the widget is created. - */ - protected defaultToolbarFactory( - widget: IDocumentWidget, - ): DocumentRegistry.IToolbarItem[] { - const ft = this.fileType(widget.context.path); - if (ft?.name === FileType.Csv) { - return [ - { - name: "arbalister:csv-toolbar", - widget: new CsvToolbar( - { - gridViewer: widget.content, - translator: this.translator, - }, - DEFAULT_CSV_OPTIONS, - ), - }, - ]; + protected makeToolbarItems(gridViewer: ArrowGridViewer): DocumentRegistry.IToolbarItem[] { + const ft = this.fileType(gridViewer.path); + if (!ft) { + return []; } - return []; + + const toolbar = createToolbar( + ft.name as FileType, + { gridViewer, translator: this.translator }, + gridViewer.fileOptions, + gridViewer.fileInfo, + ); + + return toolbar ? [{ name: `arbalister:${ft.name}-toolbar`, widget: toolbar }] : []; } updateIcon(widget: IDocumentWidget) {