diff --git a/arbalister/routes.py b/arbalister/routes.py index 2f6ebc2..8b0d2ea 100644 --- a/arbalister/routes.py +++ b/arbalister/routes.py @@ -22,20 +22,20 @@ class Empty: @dataclasses.dataclass(frozen=True, slots=True) -class SqliteOptions: +class SqliteReadOptions: """Query parameter for the Sqlite reader.""" table_name: str | None = None @dataclasses.dataclass(frozen=True, slots=True) -class CsvOptions: +class CsvReadOptions: """Query parameter for the CSV reader.""" delimiter: str | None = "," -FileOptions = SqliteOptions | CsvOptions | Empty +FileReadOptions = SqliteReadOptions | CsvReadOptions | Empty class BaseRouteHandler(jupyter_server.base.handlers.APIHandler): @@ -58,7 +58,7 @@ def dataframe(self, path: str) -> dn.DataFrame: """ file = self.data_file(path) file_format = ff.FileFormat.from_filename(file) - file_params = self.get_file_read_params(file_format) + file_params = self.get_file_options(file_format) read_table = abw.get_table_reader(format=file_format) return read_table(self.context, file, **dataclasses.asdict(file_params)) @@ -66,13 +66,13 @@ def get_query_params_as[T](self, dataclass_type: type[T]) -> T: """Extract query parameters into a dataclass type.""" return params.build_dataclass(dataclass_type, self.get_query_argument) - def get_file_read_params(self, file_format: ff.FileFormat) -> FileOptions: + def get_file_options(self, file_format: ff.FileFormat) -> FileReadOptions: """Read the parameters associated with the relevant file format.""" match file_format: case ff.FileFormat.Sqlite: - return self.get_query_params_as(SqliteOptions) + return self.get_query_params_as(SqliteReadOptions) case ff.FileFormat.Csv: - return self.get_query_params_as(CsvOptions) + return self.get_query_params_as(CsvReadOptions) return Empty() @@ -194,7 +194,7 @@ class CsvFileInfo: delimiters: list[str] = dataclasses.field(default_factory=lambda: [",", ";", "\\t", "|", "#"]) -FileInfo = SqliteFileInfo +FileInfo = SqliteFileInfo | CsvFileInfo @dataclasses.dataclass(frozen=True, slots=True) @@ -202,11 +202,11 @@ class FileInfoResponse[I, P]: """File-specific information and defaults returned in the file info route.""" info: I - read_params: P + default_options: P -CsvFileInfoResponse = FileInfoResponse[CsvFileInfo, CsvOptions] -SqliteFileInfoResponse = FileInfoResponse[SqliteFileInfo, SqliteOptions] +CsvFileInfoResponse = FileInfoResponse[CsvFileInfo, CsvReadOptions] +SqliteFileInfoResponse = FileInfoResponse[SqliteFileInfo, SqliteReadOptions] NoFileInfoResponse = FileInfoResponse[Empty, Empty] @@ -225,7 +225,7 @@ async def get(self, path: str) -> None: info = CsvFileInfo() csv_response = CsvFileInfoResponse( info=info, - read_params=CsvOptions(delimiter=info.delimiters[0]), + default_options=CsvReadOptions(delimiter=info.delimiters[0]), ) await self.finish(dataclasses.asdict(csv_response)) case ff.FileFormat.Sqlite: @@ -235,11 +235,11 @@ async def get(self, path: str) -> None: sqlite_response = SqliteFileInfoResponse( info=SqliteFileInfo(table_names=table_names), - read_params=SqliteOptions(table_name=table_names[0]), + default_options=SqliteReadOptions(table_name=table_names[0]), ) await self.finish(dataclasses.asdict(sqlite_response)) case _: - no_response = NoFileInfoResponse(info=Empty(), read_params=Empty()) + no_response = NoFileInfoResponse(info=Empty(), default_options=Empty()) await self.finish(dataclasses.asdict(no_response)) diff --git a/arbalister/tests/test_routes.py b/arbalister/tests/test_routes.py index 8fdb8f3..a40e277 100644 --- a/arbalister/tests/test_routes.py +++ b/arbalister/tests/test_routes.py @@ -18,36 +18,38 @@ params=[ (ff.FileFormat.Avro, arb.routes.Empty()), (ff.FileFormat.Csv, arb.routes.Empty()), - (ff.FileFormat.Csv, arb.routes.CsvOptions(delimiter=";")), + (ff.FileFormat.Csv, arb.routes.CsvReadOptions(delimiter=";")), (ff.FileFormat.Ipc, arb.routes.Empty()), (ff.FileFormat.Orc, arb.routes.Empty()), (ff.FileFormat.Parquet, arb.routes.Empty()), (ff.FileFormat.Sqlite, arb.routes.Empty()), - (ff.FileFormat.Sqlite, arb.routes.SqliteOptions(table_name="dummy_table_2")), + (ff.FileFormat.Sqlite, arb.routes.SqliteReadOptions(table_name="dummy_table_2")), ], ids=lambda f_p: f"{f_p[0].value}-{dataclasses.asdict(f_p[1])}", scope="module", ) -def file_format_and_params(request: pytest.FixtureRequest) -> tuple[ff.FileFormat, arb.routes.FileOptions]: +def file_format_and_params( + request: pytest.FixtureRequest, +) -> tuple[ff.FileFormat, arb.routes.FileReadOptions]: """Parametrize the file format and file parameters used in the tests. This is used to to build test cases with a give set of parameters since each file format may be tested with a different number of parameters. """ - out: tuple[ff.FileFormat, arb.routes.FileOptions] = request.param + out: tuple[ff.FileFormat, arb.routes.FileReadOptions] = request.param return out @pytest.fixture(scope="module") -def file_format(file_format_and_params: tuple[ff.FileFormat, arb.routes.FileOptions]) -> ff.FileFormat: +def file_format(file_format_and_params: tuple[ff.FileFormat, arb.routes.FileReadOptions]) -> ff.FileFormat: """Extract the the file format fixture value used in the tests.""" return file_format_and_params[0] @pytest.fixture(scope="module") def file_params( - file_format_and_params: tuple[ff.FileFormat, arb.routes.FileOptions], -) -> arb.routes.FileOptions: + file_format_and_params: tuple[ff.FileFormat, arb.routes.FileReadOptions], +) -> arb.routes.FileReadOptions: """Extract the the file parameters fixture value used in the tests.""" return file_format_and_params[1] @@ -82,7 +84,7 @@ def dummy_table_2(num_rows: int = 13) -> pa.Table: @pytest.fixture(scope="module") def full_table(file_params: ff.FileFormat, dummy_table_1: pa.Table, dummy_table_2: pa.Table) -> pa.Table: """Return the full table on which we are executed queries.""" - if isinstance(file_params, arb.routes.SqliteOptions): + if isinstance(file_params, arb.routes.SqliteReadOptions): return { "dummy_table_1": dummy_table_1, "dummy_table_2": dummy_table_2, @@ -96,7 +98,7 @@ def table_file( dummy_table_1: pa.Table, dummy_table_2: pa.Table, file_format: ff.FileFormat, - file_params: arb.routes.FileOptions, + file_params: arb.routes.FileReadOptions, ) -> pathlib.Path: """Write the dummy table to file.""" write_table = arb.arrow.get_table_writer(file_format) @@ -162,7 +164,7 @@ async def test_ipc_route_limit( full_table: pa.Table, table_file: pathlib.Path, ipc_params: arb.routes.IpcParams, - file_params: arb.routes.SqliteOptions, + file_params: arb.routes.FileReadOptions, file_format: ff.FileFormat, ) -> None: """Test fetching a file returns the limited rows and columns in IPC.""" @@ -204,7 +206,7 @@ async def test_stats_route( jp_fetch: JpFetch, full_table: pa.Table, table_file: pathlib.Path, - file_params: arb.routes.SqliteOptions, + file_params: arb.routes.FileReadOptions, file_format: ff.FileFormat, ) -> None: """Test fetching a file returns the correct metadata in Json.""" @@ -242,15 +244,15 @@ async def test_file_info_route_sqlite( payload = json.loads(response.body) info = payload["info"] - read_params = payload["read_params"] + default_options = payload["default_options"] match file_format: case ff.FileFormat.Csv: assert isinstance(info["delimiters"], list) assert "," in info["delimiters"] - assert read_params["delimiter"] == info["delimiters"][0] + assert default_options["delimiter"] == info["delimiters"][0] case ff.FileFormat.Sqlite: assert isinstance(info["table_names"], list) assert "dummy_table_1" in info["table_names"] assert "dummy_table_2" in info["table_names"] - assert read_params["table_name"] == info["table_names"][0] + assert default_options["table_name"] == info["table_names"][0] diff --git a/pixi.lock b/pixi.lock index 8701b1b..1efa730 100644 --- a/pixi.lock +++ b/pixi.lock @@ -2701,7 +2701,7 @@ packages: - pypi: ./ name: arbalister version: 0.1.0 - sha256: ce4d4757fb419f03561df3a34b59621a12b87b932ee7b2894bcf31a4281c667a + sha256: 561d9bb11700477f055763e68e78504e6067f570dc99c6af5cbee2e6fda1971e requires_dist: - adbc-driver-manager - adbc-driver-sqlite diff --git a/pyproject.toml b/pyproject.toml index 2565ae0..64d1820 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -188,7 +188,7 @@ cmd = "biome check --write --error-on-warnings" description = "Check JS/TS/CSS code with biome." [tool.pixi.feature.test.tasks.check-typescript] -cmd = "npx --no tsc --noEmit" +cmd = "npx --no tsc --noEmit && echo Typescript OK" description = "Check TS code with typescript." [tool.pixi.feature.test.tasks.check-typos] diff --git a/src/__tests__/model.spec.ts b/src/__tests__/model.spec.ts index f6ad753..4b84eb2 100644 --- a/src/__tests__/model.spec.ts +++ b/src/__tests__/model.spec.ts @@ -5,7 +5,7 @@ import type * as Arrow from "apache-arrow"; import { ArrowModel } from "../model"; import { fetchStats, fetchTable } from "../requests"; -import type { FileInfo, FileOptions } from "../file_options"; +import type { FileInfo, FileReadOptions } from "../file-options"; import type * as Req from "../requests"; const MOCK_TABLE = tableFromArrays({ @@ -53,7 +53,11 @@ describe("ArrowModel", () => { (fetchTable as jest.Mock).mockImplementation(fetchTableMocked); (fetchStats as jest.Mock).mockImplementation(fetchStatsMocked); - const model = new ArrowModel({ path: "test/path.parquet" }, {} as FileOptions, {} as FileInfo); + const model = new ArrowModel( + { path: "test/path.parquet" }, + {} as FileReadOptions, + {} as FileInfo, + ); it("should initialize data", async () => { await model.ready; @@ -73,13 +77,13 @@ describe("ArrowModel", () => { }); it("should reinitialize when fileOptions is set", async () => { - const model2 = new ArrowModel({ path: "test/data.csv" }, {} as FileOptions, {} as FileInfo); + const model2 = new ArrowModel({ path: "test/data.csv" }, {} as FileReadOptions, {} as FileInfo); await model2.ready; const initialStatsCallCount = (fetchStats as jest.Mock).mock.calls.length; const initialTableCallCount = (fetchTable as jest.Mock).mock.calls.length; - model2.fileOptions = { delimiter: ";" } as FileOptions; + model2.fileReadOptions = { delimiter: ";" } as FileReadOptions; await model2.ready; expect(fetchStats).toHaveBeenCalledTimes(initialStatsCallCount + 1); diff --git a/src/file-options.ts b/src/file-options.ts new file mode 100644 index 0000000..24b916f --- /dev/null +++ b/src/file-options.ts @@ -0,0 +1,56 @@ +import { FileType } from "./file-types"; + +export interface CsvReadOptions { + delimiter: string; +} + +export interface SqliteReadOptions { + table_name: string; +} + +export interface CsvFileInfo { + delimiters: string[]; +} + +export interface SqliteFileInfo { + table_names: string[]; +} + +/** + * Central registry mapping FileType to its related types. + * This ensures type safety when working with file-type-specific data. + */ +interface FileTypeRegistry { + [FileType.Csv]: { + readOptions: CsvReadOptions; + info: CsvFileInfo; + }; + [FileType.Sqlite]: { + readOptions: SqliteReadOptions; + info: SqliteFileInfo; + }; +} + +/** + * Extract the options type for a specific FileType. + */ +export type FileReadOptionsFor = T extends keyof FileTypeRegistry + ? FileTypeRegistry[T]["readOptions"] + : never; + +/** + * Extract the info type for a specific FileType. + */ +export type FileInfoFor = T extends keyof FileTypeRegistry + ? FileTypeRegistry[T]["info"] + : never; + +/** + * Union of all possible file options. + */ +export type FileReadOptions = FileTypeRegistry[keyof FileTypeRegistry]["readOptions"]; + +/** + * Union of all possible file info. + */ +export type FileInfo = FileTypeRegistry[keyof FileTypeRegistry]["info"]; diff --git a/src/filetypes.ts b/src/file-types.ts similarity index 100% rename from src/filetypes.ts rename to src/file-types.ts diff --git a/src/file_options.ts b/src/file_options.ts deleted file mode 100644 index 7b340f7..0000000 --- a/src/file_options.ts +++ /dev/null @@ -1,19 +0,0 @@ -export interface CsvOptions { - delimiter: string; -} - -export interface SqliteOptions { - table_name: string; -} - -export type FileOptions = CsvOptions | SqliteOptions; - -export interface SqliteFileInfo { - table_names: string[]; -} - -export interface CsvFileInfo { - delimiters: string[]; -} - -export type FileInfo = SqliteFileInfo | CsvFileInfo; diff --git a/src/index.ts b/src/index.ts index 9eac098..0019e66 100644 --- a/src/index.ts +++ b/src/index.ts @@ -8,7 +8,7 @@ import type * as services from "@jupyterlab/services"; import type { Contents } from "@jupyterlab/services"; import type { DataGrid } from "@lumino/datagrid"; -import { ensureFileType, FileType, updateIcon } from "./filetypes"; +import { ensureFileType, FileType, updateIcon } from "./file-types"; import { ArrowGridViewerFactory } from "./widget"; import type { ArrowGridViewer, ITextRenderConfig } from "./widget"; diff --git a/src/model.ts b/src/model.ts index 5ab7bad..1a9b3da 100644 --- a/src/model.ts +++ b/src/model.ts @@ -4,7 +4,7 @@ import type * as Arrow from "apache-arrow"; import { PairMap } from "./collection"; import { fetchFileInfo, fetchStats, fetchTable } from "./requests"; -import type { FileInfo, FileOptions } from "./file_options"; +import type { FileInfo, FileReadOptions } from "./file-options"; export namespace ArrowModel { export interface LoadingOptions { @@ -17,7 +17,7 @@ export namespace ArrowModel { export class ArrowModel extends DataModel { static async fromRemoteFileInfo(loadingOptions: ArrowModel.LoadingOptions) { - const { info: fileInfo, read_params: fileOptions } = await fetchFileInfo({ + const { info: fileInfo, default_options: fileOptions } = await fetchFileInfo({ path: loadingOptions.path, }); return new ArrowModel(loadingOptions, fileOptions, fileInfo); @@ -25,7 +25,7 @@ export class ArrowModel extends DataModel { constructor( loadingOptions: ArrowModel.LoadingOptions, - fileOptions: FileOptions, + fileOptions: FileReadOptions, fileInfo: FileInfo, ) { super(); @@ -59,11 +59,11 @@ export class ArrowModel extends DataModel { return this._fileInfo; } - get fileOptions(): Readonly { + get fileReadOptions(): Readonly { return this._fileOptions; } - set fileOptions(fileOptions: FileOptions) { + set fileReadOptions(fileOptions: FileReadOptions) { this._fileOptions = fileOptions; this._ready = this.initialize().then(() => { this.emitChanged({ type: "model-reset" }); @@ -196,7 +196,7 @@ export class ArrowModel extends DataModel { private readonly _loadingParams: Required; private readonly _fileInfo: FileInfo; - private _fileOptions: FileOptions; + private _fileOptions: FileReadOptions; private _numRows: number = 0; private _numCols: number = 0; diff --git a/src/requests.ts b/src/requests.ts index 42b9b62..344a8ea 100644 --- a/src/requests.ts +++ b/src/requests.ts @@ -1,15 +1,27 @@ import { tableFromIPC } from "apache-arrow"; import type * as Arrow from "apache-arrow"; -import type { FileInfo, FileOptions } from "./file_options"; +import type { FileInfo, FileInfoFor, FileReadOptions, FileReadOptionsFor } from "./file-options"; +import type { FileType } from "./file-types"; export interface FileInfoOptions { path: string; } +/** + * Type-safe file info response for a specific file type. + */ +export interface FileInfoResponseFor { + info: FileInfoFor; + default_options: FileReadOptionsFor; +} + +/** + * Generic file info response (union of all file types). + */ export interface FileInfoResponse { info: FileInfo; - read_params: FileOptions; + default_options: FileReadOptions; } export async function fetchFileInfo(params: Readonly): Promise { @@ -22,6 +34,11 @@ export interface StatsOptions { path: string; } +/** + * Type-safe stats options for a specific file type. + */ +export type StatsOptionsFor = StatsOptions & FileReadOptionsFor; + interface SchemaInfo { data: string; mimetype: string; @@ -48,7 +65,7 @@ type OptionalizeUnion = { }; export async function fetchStats( - params: Readonly, + params: Readonly, ): Promise { const queryKeys = ["path", "delimiter", "table_name"] as const; const queryKeyMap: Record = { @@ -58,7 +75,7 @@ export async function fetchStats( const query = new URLSearchParams(); for (const key of queryKeys) { - const value = (params as Readonly & OptionalizeUnion)[key]; + const value = (params as Readonly & OptionalizeUnion)[key]; if (value !== undefined && value != null) { const queryKey = queryKeyMap[key] || key; query.set(queryKey, value.toString()); @@ -105,8 +122,13 @@ export interface TableOptions { col_chunk?: number; } +/** + * Type-safe table options for a specific file type. + */ +export type TableOptionsFor = TableOptions & FileReadOptionsFor; + export async function fetchTable( - params: Readonly, + params: Readonly, ): Promise { const queryKeys = [ "row_chunk_size", @@ -120,7 +142,7 @@ export async function fetchTable( const query = new URLSearchParams(); for (const key of queryKeys) { - const value = (params as Readonly & OptionalizeUnion)[key]; + const value = (params as Readonly & OptionalizeUnion)[key]; if (value !== undefined && value != null) { query.set(key, value.toString()); } diff --git a/src/toolbar.ts b/src/toolbar.ts index eba332f..07cb9a3 100644 --- a/src/toolbar.ts +++ b/src/toolbar.ts @@ -7,15 +7,16 @@ import { Widget } from "@lumino/widgets"; import type { ITranslator } from "@jupyterlab/translation"; import type { Message } from "@lumino/messaging"; -import { FileType } from "./filetypes"; +import { FileType } from "./file-types"; import type { CsvFileInfo, - CsvOptions, - FileInfo, - FileOptions, + CsvReadOptions, + FileInfoFor, + FileReadOptions, + FileReadOptionsFor, SqliteFileInfo, - SqliteOptions, -} from "./file_options"; + SqliteReadOptions, +} from "./file-options"; import type { ArrowGridViewer } from "./widget"; /** @@ -28,7 +29,7 @@ abstract class DropdownToolbar extends Widget { this.addClass("arrow-viewer-toolbar"); } - abstract get fileOptions(): FileOptions; + abstract get fileOptions(): FileReadOptions; get selectNode(): HTMLSelectElement { return this.node.getElementsByTagName("select")![0]; @@ -47,7 +48,7 @@ abstract class DropdownToolbar extends Widget { handleEvent(event: Event): void { switch (event.type) { case "change": - this._gridViewer.updateFileOptions(this.fileOptions); + this._gridViewer.updateFileReadOptions(this.fileOptions); break; default: break; @@ -73,14 +74,14 @@ export namespace CsvToolbar { } export class CsvToolbar extends DropdownToolbar { - constructor(options: CsvToolbar.Options, fileOptions: CsvOptions, fileInfo: CsvFileInfo) { + constructor(options: CsvToolbar.Options, fileOptions: CsvReadOptions, fileInfo: CsvFileInfo) { super( options.gridViewer, Private.createDelimiterNode(fileOptions.delimiter, fileInfo.delimiters, options.translator), ); } - get fileOptions(): CsvOptions { + get fileOptions(): CsvReadOptions { return { delimiter: this.selectNode.value, }; @@ -97,7 +98,7 @@ export namespace SqliteToolbar { export class SqliteToolbar extends DropdownToolbar { constructor( options: SqliteToolbar.Options, - fileOptions: SqliteOptions, + fileOptions: SqliteReadOptions, fileInfo: SqliteFileInfo, ) { super( @@ -106,7 +107,7 @@ export class SqliteToolbar extends DropdownToolbar { ); } - get fileOptions(): SqliteOptions { + get fileOptions(): SqliteReadOptions { return { table_name: this.selectNode.value, }; @@ -123,18 +124,23 @@ export interface ToolbarOptions { /** * Factory function to create the appropriate toolbar for a given file type. + * Type-safe overloads ensure the correct options and info types are used. */ -export function createToolbar( - fileType: FileType, +export function createToolbar( + fileType: T, options: ToolbarOptions, - fileOptions: FileOptions, - fileInfo: FileInfo, + fileOptions: FileReadOptionsFor, + fileInfo: FileInfoFor, ): Widget | null { switch (fileType) { case FileType.Csv: - return new CsvToolbar(options, fileOptions as CsvOptions, fileInfo as CsvFileInfo); + return new CsvToolbar(options, fileOptions as CsvReadOptions, fileInfo as CsvFileInfo); case FileType.Sqlite: - return new SqliteToolbar(options, fileOptions as SqliteOptions, fileInfo as SqliteFileInfo); + return new SqliteToolbar( + options, + fileOptions as SqliteReadOptions, + fileInfo as SqliteFileInfo, + ); default: return null; } diff --git a/src/widget.ts b/src/widget.ts index 388387c..f3f6c0e 100644 --- a/src/widget.ts +++ b/src/widget.ts @@ -12,10 +12,10 @@ import { Panel } from "@lumino/widgets"; import type { DocumentRegistry, IDocumentWidget } from "@jupyterlab/docregistry"; import type * as DataGridModule from "@lumino/datagrid"; -import { FileType } from "./filetypes"; +import { FileType } from "./file-types"; import { ArrowModel } from "./model"; import { createToolbar } from "./toolbar"; -import type { FileInfo, FileOptions } from "./file_options"; +import type { FileInfo, FileReadOptions } from "./file-options"; export namespace ArrowGridViewer { export interface Options { @@ -74,17 +74,17 @@ export class ArrowGridViewer extends Panel { return this.dataModel.fileInfo; } - get fileOptions(): Readonly { - return this.dataModel.fileOptions; + get fileReadOptions(): Readonly { + return this.dataModel.fileReadOptions; } - set fileOptions(fileOptions: FileOptions) { - this.dataModel.fileOptions = fileOptions; + set fileReadOptions(fileOptions: FileReadOptions) { + this.dataModel.fileReadOptions = fileOptions; } - updateFileOptions(fileOptionsUpdate: Partial) { - this.fileOptions = { - ...this.fileOptions, + updateFileReadOptions(fileOptionsUpdate: Partial) { + this.fileReadOptions = { + ...this.fileReadOptions, ...fileOptionsUpdate, }; } @@ -215,7 +215,7 @@ export class ArrowGridViewerFactory extends ABCWidgetFactory