Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arbalister/file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@ def from_filename(cls, file: pathlib.Path | str) -> Self:
match file_type:
case "ipc" | "feather":
return cls.Ipc
case "sqlite3" | "db" | ".db3", ".s3db", ".sl3":
case "sqlite3" | "db" | "db3" | "s3db" | "sl3":
return cls.Sqlite
raise ValueError(f"Unknown file type {file_type}")
17 changes: 16 additions & 1 deletion src/__tests__/model.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type * as Arrow from "apache-arrow";

import { ArrowModel } from "../model";
import { fetchStats, fetchTable } from "../requests";
import type { FileInfo, FileOptions } from "../file_options";
import type * as Req from "../requests";

const MOCK_TABLE = tableFromArrays({
Expand Down Expand Up @@ -52,7 +53,7 @@ describe("ArrowModel", () => {
(fetchTable as jest.Mock).mockImplementation(fetchTableMocked);
(fetchStats as jest.Mock).mockImplementation(fetchStatsMocked);

const model = new ArrowModel({ path: "test/path.parquet" }, {});
const model = new ArrowModel({ path: "test/path.parquet" }, {} as FileOptions, {} as FileInfo);

it("should initialize data", async () => {
await model.ready;
Expand All @@ -70,4 +71,18 @@ describe("ArrowModel", () => {
// First chunk is initialized
expect(model.data("body", 0, 0)).toEqual(MOCK_TABLE.getChildAt(0)?.get(0).toString());
});

it("should reinitialize when fileOptions is set", async () => {
const model2 = new ArrowModel({ path: "test/data.csv" }, {} as FileOptions, {} as FileInfo);
await model2.ready;

const initialStatsCallCount = (fetchStats as jest.Mock).mock.calls.length;
const initialTableCallCount = (fetchTable as jest.Mock).mock.calls.length;

model2.fileOptions = { delimiter: ";" } as FileOptions;
await model2.ready;

expect(fetchStats).toHaveBeenCalledTimes(initialStatsCallCount + 1);
expect(fetchTable).toHaveBeenCalledTimes(initialTableCallCount + 1);
});
});
20 changes: 15 additions & 5 deletions src/file_options.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
export interface CsvOptions {
delimiter?: string;
delimiter: string;
}

export const DEFAULT_CSV_OPTIONS: Required<CsvOptions> = {
delimiter: ",",
};
export interface SqliteOptions {
table_name: string;
}

export type FileOptions = CsvOptions | SqliteOptions;

export interface SqliteFileInfo {
table_names: string[];
}

export interface CsvFileInfo {
delimiters: string[];
}

export type FileOptions = CsvOptions;
export type FileInfo = SqliteFileInfo | CsvFileInfo;
39 changes: 34 additions & 5 deletions src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import { DataModel } from "@lumino/datagrid";
import type * as Arrow from "apache-arrow";

import { PairMap } from "./collection";
import { fetchStats, fetchTable } from "./requests";
import type { FileOptions } from "./file_options";
import { fetchFileInfo, fetchStats, fetchTable } from "./requests";
import type { FileInfo, FileOptions } from "./file_options";

export namespace ArrowModel {
export interface LoadingOptions {
Expand All @@ -16,7 +16,18 @@ export namespace ArrowModel {
}

export class ArrowModel extends DataModel {
constructor(loadingOptions: ArrowModel.LoadingOptions, fileOptions: FileOptions) {
static async fromRemoteFileInfo(loadingOptions: ArrowModel.LoadingOptions) {
const { info: fileInfo, read_params: fileOptions } = await fetchFileInfo({
path: loadingOptions.path,
});
return new ArrowModel(loadingOptions, fileOptions, fileInfo);
}

constructor(
loadingOptions: ArrowModel.LoadingOptions,
fileOptions: FileOptions,
fileInfo: FileInfo,
) {
super();

this._loadingParams = {
Expand All @@ -26,6 +37,7 @@ export class ArrowModel extends DataModel {
...loadingOptions,
};
this._fileOptions = fileOptions;
this._fileInfo = fileInfo;

this._ready = this.initialize();
}
Expand All @@ -37,9 +49,25 @@ export class ArrowModel extends DataModel {
]);

this._schema = stats.schema;
this._chunks.set([0, 0], chunk00);
this._numCols = stats.num_cols;
this._numRows = stats.num_rows;
this._chunks = new PairMap();
this._chunks.set([0, 0], chunk00);
}

get fileInfo(): Readonly<FileInfo> {
return this._fileInfo;
}

get fileOptions(): Readonly<FileOptions> {
return this._fileOptions;
}

set fileOptions(fileOptions: FileOptions) {
this._fileOptions = fileOptions;
this._ready = this.initialize().then(() => {
this.emitChanged({ type: "model-reset" });
});
}

get ready(): Promise<void> {
Expand Down Expand Up @@ -167,7 +195,8 @@ export class ArrowModel extends DataModel {
}

private readonly _loadingParams: Required<ArrowModel.LoadingOptions>;
private readonly _fileOptions: FileOptions;
private readonly _fileInfo: FileInfo;
private _fileOptions: FileOptions;

private _numRows: number = 0;
private _numCols: number = 0;
Expand Down
26 changes: 23 additions & 3 deletions src/requests.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,22 @@
import { tableFromIPC } from "apache-arrow";
import type * as Arrow from "apache-arrow";

import type { FileOptions } from "./file_options";
import type { FileInfo, FileOptions } from "./file_options";

export interface FileInfoOptions {
path: string;
}

export interface FileInfoResponse {
info: FileInfo;
read_params: FileOptions;
}

export async function fetchFileInfo(params: Readonly<FileInfoOptions>): Promise<FileInfoResponse> {
const response = await fetch(`/file/info/${params.path}`);
const data: FileInfoResponse = await response.json();
return data;
}

export interface StatsOptions {
path: string;
Expand Down Expand Up @@ -35,14 +50,18 @@ type OptionalizeUnion<T> = {
export async function fetchStats(
params: Readonly<StatsOptions & FileOptions>,
): Promise<StatsResponse> {
const queryKeys = ["path", "delimiter"] as const;
const queryKeys = ["path", "delimiter", "table_name"] as const;
const queryKeyMap: Record<string, string> = {
tableName: "table_name",
};

const query = new URLSearchParams();

for (const key of queryKeys) {
const value = (params as Readonly<TableOptions> & OptionalizeUnion<FileOptions>)[key];
if (value !== undefined && value != null) {
query.set(key, value.toString());
const queryKey = queryKeyMap[key] || key;
query.set(queryKey, value.toString());
}
}

Expand Down Expand Up @@ -95,6 +114,7 @@ export async function fetchTable(
"col_chunk_size",
"col_chunk",
"delimiter",
"table_name",
] as const;

const query = new URLSearchParams();
Expand Down
Loading
Loading