Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions arbalister/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,20 @@ class Empty:


@dataclasses.dataclass(frozen=True, slots=True)
class SqliteOptions:
class SqliteReadOptions:
"""Query parameter for the Sqlite reader."""

table_name: str | None = None


@dataclasses.dataclass(frozen=True, slots=True)
class CsvOptions:
class CsvReadOptions:
"""Query parameter for the CSV reader."""

delimiter: str | None = ","


FileOptions = SqliteOptions | CsvOptions | Empty
FileReadOptions = SqliteReadOptions | CsvReadOptions | Empty


class BaseRouteHandler(jupyter_server.base.handlers.APIHandler):
Expand All @@ -58,21 +58,21 @@ def dataframe(self, path: str) -> dn.DataFrame:
"""
file = self.data_file(path)
file_format = ff.FileFormat.from_filename(file)
file_params = self.get_file_read_params(file_format)
file_params = self.get_file_options(file_format)
read_table = abw.get_table_reader(format=file_format)
return read_table(self.context, file, **dataclasses.asdict(file_params))

def get_query_params_as[T](self, dataclass_type: type[T]) -> T:
"""Extract query parameters into a dataclass type."""
return params.build_dataclass(dataclass_type, self.get_query_argument)

def get_file_read_params(self, file_format: ff.FileFormat) -> FileOptions:
def get_file_options(self, file_format: ff.FileFormat) -> FileReadOptions:
"""Read the parameters associated with the relevant file format."""
match file_format:
case ff.FileFormat.Sqlite:
return self.get_query_params_as(SqliteOptions)
return self.get_query_params_as(SqliteReadOptions)
case ff.FileFormat.Csv:
return self.get_query_params_as(CsvOptions)
return self.get_query_params_as(CsvReadOptions)
return Empty()


Expand Down Expand Up @@ -194,19 +194,19 @@ class CsvFileInfo:
delimiters: list[str] = dataclasses.field(default_factory=lambda: [",", ";", "\\t", "|", "#"])


FileInfo = SqliteFileInfo
FileInfo = SqliteFileInfo | CsvFileInfo


@dataclasses.dataclass(frozen=True, slots=True)
class FileInfoResponse[I, P]:
"""File-specific information and defaults returned in the file info route."""

info: I
read_params: P
default_options: P


CsvFileInfoResponse = FileInfoResponse[CsvFileInfo, CsvOptions]
SqliteFileInfoResponse = FileInfoResponse[SqliteFileInfo, SqliteOptions]
CsvFileInfoResponse = FileInfoResponse[CsvFileInfo, CsvReadOptions]
SqliteFileInfoResponse = FileInfoResponse[SqliteFileInfo, SqliteReadOptions]

NoFileInfoResponse = FileInfoResponse[Empty, Empty]

Expand All @@ -225,7 +225,7 @@ async def get(self, path: str) -> None:
info = CsvFileInfo()
csv_response = CsvFileInfoResponse(
info=info,
read_params=CsvOptions(delimiter=info.delimiters[0]),
default_options=CsvReadOptions(delimiter=info.delimiters[0]),
)
await self.finish(dataclasses.asdict(csv_response))
case ff.FileFormat.Sqlite:
Expand All @@ -235,11 +235,11 @@ async def get(self, path: str) -> None:

sqlite_response = SqliteFileInfoResponse(
info=SqliteFileInfo(table_names=table_names),
read_params=SqliteOptions(table_name=table_names[0]),
default_options=SqliteReadOptions(table_name=table_names[0]),
)
await self.finish(dataclasses.asdict(sqlite_response))
case _:
no_response = NoFileInfoResponse(info=Empty(), read_params=Empty())
no_response = NoFileInfoResponse(info=Empty(), default_options=Empty())
await self.finish(dataclasses.asdict(no_response))


Expand Down
30 changes: 16 additions & 14 deletions arbalister/tests/test_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,36 +18,38 @@
params=[
(ff.FileFormat.Avro, arb.routes.Empty()),
(ff.FileFormat.Csv, arb.routes.Empty()),
(ff.FileFormat.Csv, arb.routes.CsvOptions(delimiter=";")),
(ff.FileFormat.Csv, arb.routes.CsvReadOptions(delimiter=";")),
(ff.FileFormat.Ipc, arb.routes.Empty()),
(ff.FileFormat.Orc, arb.routes.Empty()),
(ff.FileFormat.Parquet, arb.routes.Empty()),
(ff.FileFormat.Sqlite, arb.routes.Empty()),
(ff.FileFormat.Sqlite, arb.routes.SqliteOptions(table_name="dummy_table_2")),
(ff.FileFormat.Sqlite, arb.routes.SqliteReadOptions(table_name="dummy_table_2")),
],
ids=lambda f_p: f"{f_p[0].value}-{dataclasses.asdict(f_p[1])}",
scope="module",
)
def file_format_and_params(request: pytest.FixtureRequest) -> tuple[ff.FileFormat, arb.routes.FileOptions]:
def file_format_and_params(
request: pytest.FixtureRequest,
) -> tuple[ff.FileFormat, arb.routes.FileReadOptions]:
"""Parametrize the file format and file parameters used in the tests.

This is used to to build test cases with a give set of parameters since each file format may be tested
with a different number of parameters.
"""
out: tuple[ff.FileFormat, arb.routes.FileOptions] = request.param
out: tuple[ff.FileFormat, arb.routes.FileReadOptions] = request.param
return out


@pytest.fixture(scope="module")
def file_format(file_format_and_params: tuple[ff.FileFormat, arb.routes.FileOptions]) -> ff.FileFormat:
def file_format(file_format_and_params: tuple[ff.FileFormat, arb.routes.FileReadOptions]) -> ff.FileFormat:
"""Extract the the file format fixture value used in the tests."""
return file_format_and_params[0]


@pytest.fixture(scope="module")
def file_params(
file_format_and_params: tuple[ff.FileFormat, arb.routes.FileOptions],
) -> arb.routes.FileOptions:
file_format_and_params: tuple[ff.FileFormat, arb.routes.FileReadOptions],
) -> arb.routes.FileReadOptions:
"""Extract the the file parameters fixture value used in the tests."""
return file_format_and_params[1]

Expand Down Expand Up @@ -82,7 +84,7 @@ def dummy_table_2(num_rows: int = 13) -> pa.Table:
@pytest.fixture(scope="module")
def full_table(file_params: ff.FileFormat, dummy_table_1: pa.Table, dummy_table_2: pa.Table) -> pa.Table:
"""Return the full table on which we are executed queries."""
if isinstance(file_params, arb.routes.SqliteOptions):
if isinstance(file_params, arb.routes.SqliteReadOptions):
return {
"dummy_table_1": dummy_table_1,
"dummy_table_2": dummy_table_2,
Expand All @@ -96,7 +98,7 @@ def table_file(
dummy_table_1: pa.Table,
dummy_table_2: pa.Table,
file_format: ff.FileFormat,
file_params: arb.routes.FileOptions,
file_params: arb.routes.FileReadOptions,
) -> pathlib.Path:
"""Write the dummy table to file."""
write_table = arb.arrow.get_table_writer(file_format)
Expand Down Expand Up @@ -162,7 +164,7 @@ async def test_ipc_route_limit(
full_table: pa.Table,
table_file: pathlib.Path,
ipc_params: arb.routes.IpcParams,
file_params: arb.routes.SqliteOptions,
file_params: arb.routes.FileReadOptions,
file_format: ff.FileFormat,
) -> None:
"""Test fetching a file returns the limited rows and columns in IPC."""
Expand Down Expand Up @@ -204,7 +206,7 @@ async def test_stats_route(
jp_fetch: JpFetch,
full_table: pa.Table,
table_file: pathlib.Path,
file_params: arb.routes.SqliteOptions,
file_params: arb.routes.FileReadOptions,
file_format: ff.FileFormat,
) -> None:
"""Test fetching a file returns the correct metadata in Json."""
Expand Down Expand Up @@ -242,15 +244,15 @@ async def test_file_info_route_sqlite(

payload = json.loads(response.body)
info = payload["info"]
read_params = payload["read_params"]
default_options = payload["default_options"]

match file_format:
case ff.FileFormat.Csv:
assert isinstance(info["delimiters"], list)
assert "," in info["delimiters"]
assert read_params["delimiter"] == info["delimiters"][0]
assert default_options["delimiter"] == info["delimiters"][0]
case ff.FileFormat.Sqlite:
assert isinstance(info["table_names"], list)
assert "dummy_table_1" in info["table_names"]
assert "dummy_table_2" in info["table_names"]
assert read_params["table_name"] == info["table_names"][0]
assert default_options["table_name"] == info["table_names"][0]
2 changes: 1 addition & 1 deletion pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ cmd = "biome check --write --error-on-warnings"
description = "Check JS/TS/CSS code with biome."

[tool.pixi.feature.test.tasks.check-typescript]
cmd = "npx --no tsc --noEmit"
cmd = "npx --no tsc --noEmit && echo Typescript OK"
description = "Check TS code with typescript."

[tool.pixi.feature.test.tasks.check-typos]
Expand Down
12 changes: 8 additions & 4 deletions src/__tests__/model.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import type * as Arrow from "apache-arrow";

import { ArrowModel } from "../model";
import { fetchStats, fetchTable } from "../requests";
import type { FileInfo, FileOptions } from "../file_options";
import type { FileInfo, FileReadOptions } from "../file-options";
import type * as Req from "../requests";

const MOCK_TABLE = tableFromArrays({
Expand Down Expand Up @@ -53,7 +53,11 @@ describe("ArrowModel", () => {
(fetchTable as jest.Mock).mockImplementation(fetchTableMocked);
(fetchStats as jest.Mock).mockImplementation(fetchStatsMocked);

const model = new ArrowModel({ path: "test/path.parquet" }, {} as FileOptions, {} as FileInfo);
const model = new ArrowModel(
{ path: "test/path.parquet" },
{} as FileReadOptions,
{} as FileInfo,
);

it("should initialize data", async () => {
await model.ready;
Expand All @@ -73,13 +77,13 @@ describe("ArrowModel", () => {
});

it("should reinitialize when fileOptions is set", async () => {
const model2 = new ArrowModel({ path: "test/data.csv" }, {} as FileOptions, {} as FileInfo);
const model2 = new ArrowModel({ path: "test/data.csv" }, {} as FileReadOptions, {} as FileInfo);
await model2.ready;

const initialStatsCallCount = (fetchStats as jest.Mock).mock.calls.length;
const initialTableCallCount = (fetchTable as jest.Mock).mock.calls.length;

model2.fileOptions = { delimiter: ";" } as FileOptions;
model2.fileReadOptions = { delimiter: ";" } as FileReadOptions;
await model2.ready;

expect(fetchStats).toHaveBeenCalledTimes(initialStatsCallCount + 1);
Expand Down
56 changes: 56 additions & 0 deletions src/file-options.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import { FileType } from "./file-types";

export interface CsvReadOptions {
delimiter: string;
}

export interface SqliteReadOptions {
table_name: string;
}

export interface CsvFileInfo {
delimiters: string[];
}

export interface SqliteFileInfo {
table_names: string[];
}

/**
* Central registry mapping FileType to its related types.
* This ensures type safety when working with file-type-specific data.
*/
interface FileTypeRegistry {
[FileType.Csv]: {
readOptions: CsvReadOptions;
info: CsvFileInfo;
};
[FileType.Sqlite]: {
readOptions: SqliteReadOptions;
info: SqliteFileInfo;
};
}

/**
* Extract the options type for a specific FileType.
*/
export type FileReadOptionsFor<T extends FileType> = T extends keyof FileTypeRegistry
? FileTypeRegistry[T]["readOptions"]
: never;

/**
* Extract the info type for a specific FileType.
*/
export type FileInfoFor<T extends FileType> = T extends keyof FileTypeRegistry
? FileTypeRegistry[T]["info"]
: never;

/**
* Union of all possible file options.
*/
export type FileReadOptions = FileTypeRegistry[keyof FileTypeRegistry]["readOptions"];

/**
* Union of all possible file info.
*/
export type FileInfo = FileTypeRegistry[keyof FileTypeRegistry]["info"];
File renamed without changes.
19 changes: 0 additions & 19 deletions src/file_options.ts

This file was deleted.

2 changes: 1 addition & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import type * as services from "@jupyterlab/services";
import type { Contents } from "@jupyterlab/services";
import type { DataGrid } from "@lumino/datagrid";

import { ensureFileType, FileType, updateIcon } from "./filetypes";
import { ensureFileType, FileType, updateIcon } from "./file-types";
import { ArrowGridViewerFactory } from "./widget";
import type { ArrowGridViewer, ITextRenderConfig } from "./widget";

Expand Down
12 changes: 6 additions & 6 deletions src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import type * as Arrow from "apache-arrow";

import { PairMap } from "./collection";
import { fetchFileInfo, fetchStats, fetchTable } from "./requests";
import type { FileInfo, FileOptions } from "./file_options";
import type { FileInfo, FileReadOptions } from "./file-options";

export namespace ArrowModel {
export interface LoadingOptions {
Expand All @@ -17,15 +17,15 @@ export namespace ArrowModel {

export class ArrowModel extends DataModel {
static async fromRemoteFileInfo(loadingOptions: ArrowModel.LoadingOptions) {
const { info: fileInfo, read_params: fileOptions } = await fetchFileInfo({
const { info: fileInfo, default_options: fileOptions } = await fetchFileInfo({
path: loadingOptions.path,
});
return new ArrowModel(loadingOptions, fileOptions, fileInfo);
}

constructor(
loadingOptions: ArrowModel.LoadingOptions,
fileOptions: FileOptions,
fileOptions: FileReadOptions,
fileInfo: FileInfo,
) {
super();
Expand Down Expand Up @@ -59,11 +59,11 @@ export class ArrowModel extends DataModel {
return this._fileInfo;
}

get fileOptions(): Readonly<FileOptions> {
get fileReadOptions(): Readonly<FileReadOptions> {
return this._fileOptions;
}

set fileOptions(fileOptions: FileOptions) {
set fileReadOptions(fileOptions: FileReadOptions) {
this._fileOptions = fileOptions;
this._ready = this.initialize().then(() => {
this.emitChanged({ type: "model-reset" });
Expand Down Expand Up @@ -196,7 +196,7 @@ export class ArrowModel extends DataModel {

private readonly _loadingParams: Required<ArrowModel.LoadingOptions>;
private readonly _fileInfo: FileInfo;
private _fileOptions: FileOptions;
private _fileOptions: FileReadOptions;

private _numRows: number = 0;
private _numCols: number = 0;
Expand Down
Loading
Loading