Skip to content

Commit

Permalink
lazily allocate table and recordbatch columns, support NestedView's g…
Browse files Browse the repository at this point in the history
…etChildAt(i) method in ChunkedView
  • Loading branch information
trxcllnt committed Jan 24, 2018
1 parent 40b3638 commit 54d4f5b
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 45 deletions.
2 changes: 1 addition & 1 deletion js/src/predicate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ export class Col<T= any> extends Value<T> {
}
if (this.colidx < 0) { throw new Error(`Failed to bind Col "${this.name}"`); }
}
this.vector = batch.columns[this.colidx];
this.vector = batch.getChildAt(this.colidx);
return this.vector.get.bind(this.vector);
}

Expand Down
9 changes: 2 additions & 7 deletions js/src/recordbatch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import { Schema, Struct } from './type';
import { flatbuffers } from 'flatbuffers';
import { View, Vector, StructVector } from './vector';
import { Data, NestedData, ChunkedData } from './data';
import { Data, NestedData } from './data';

import Long = flatbuffers.Long;

Expand All @@ -32,7 +32,6 @@ export class RecordBatch extends StructVector {
public readonly schema: Schema;
public readonly length: number;
public readonly numCols: number;
public readonly columns: Vector<any>[];
constructor(schema: Schema, data: Data<Struct>, view: View<Struct>);
constructor(schema: Schema, numRows: Long | number, cols: Data<any> | Vector[]);
constructor(...args: any[]) {
Expand All @@ -42,9 +41,6 @@ export class RecordBatch extends StructVector {
this.schema = args[0];
this.length = data.length;
this.numCols = this.schema.fields.length;
this.columns = data instanceof ChunkedData
? data.childVectors
: data.childData.map((col) => Vector.create(col));
} else {
const [schema, numRows, cols] = args;
const columns: Vector<any>[] = new Array(cols.length);
Expand All @@ -59,7 +55,6 @@ export class RecordBatch extends StructVector {
}
super(new NestedData(new Struct(schema.fields), numRows, null, columnsData));
this.schema = schema;
this.columns = columns;
this.length = numRows;
this.numCols = schema.fields.length;
}
Expand All @@ -72,7 +67,7 @@ export class RecordBatch extends StructVector {
const namesToKeep = columnNames.reduce((xs, x) => (xs[x] = true) && xs, Object.create(null));
return new RecordBatch(
this.schema.select(...columnNames), this.length,
this.columns.filter((_, index) => namesToKeep[fields[index].name])
this.childData.filter((_, index) => namesToKeep[fields[index].name])
);
}
}
13 changes: 6 additions & 7 deletions js/src/table.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ export class Table implements DataFrame {
// List of inner RecordBatches
public readonly batches: RecordBatch[];
// List of inner Vectors, possibly spanning batches
public readonly columns: Vector<any>[];
protected readonly _columns: Vector<any>[] = [];
// Union of all inner RecordBatches into one RecordBatch, possibly chunked.
// If the Table has just one inner RecordBatch, this points to that.
// If the Table has multiple inner RecordBatches, then this is a Chunked view
Expand All @@ -94,10 +94,7 @@ export class Table implements DataFrame {
this.schema = schema;
this.batches = batches;
this.batchesUnion = batches.reduce((union, batch) => union.concat(batch));
this.columns = batches.slice(1).reduce((columns, batch) =>
columns.map((col, idx) => col.concat(batch.columns[idx])),
batches[0].columns
);
// this.columns = schema.fields.map((_, i) => this.batchesUnion.getChildAt(i));
this.length = this.batchesUnion.length;
this.numCols = this.batchesUnion.numCols;
}
Expand All @@ -108,7 +105,8 @@ export class Table implements DataFrame {
return this.getColumnAt(this.getColumnIndex(name));
}
public getColumnAt(index: number) {
return this.columns[index];
return this._columns[index] || (
this._columns[index] = this.batchesUnion.getChildAt(index));
}
public getColumnIndex(name: string) {
return this.schema.fields.findIndex((f) => f.name === name);
Expand Down Expand Up @@ -265,7 +263,8 @@ export class CountByResult extends Table implements DataFrame {
));
}
public toJSON(): Object {
const [values, counts] = this.columns;
const values = this.getColumnAt(0);
const counts = this.getColumnAt(1);
const result = {} as { [k: string]: number | null };
for (let i = -1; ++i < this.length;) {
result[values.get(i)] = counts.get(i);
Expand Down
7 changes: 5 additions & 2 deletions js/src/vector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ export class Vector<T extends DataType = any> implements VectorLike, View<T>, Vi
public static create<T extends DataType>(data: Data<T>): Vector<T> {
return createVector(data);
}
public static concat<T extends DataType>(...sources: Vector<T>[]): Vector<T> {
return sources.length === 1 ? sources[0] : sources.reduce((a, b) => a.concat(b));
}
public type: T;
public length: number;
public readonly data: Data<T>;
Expand Down Expand Up @@ -84,7 +87,7 @@ export class Vector<T extends DataType = any> implements VectorLike, View<T>, Vi
const { view } = this;
const vecs = !(view instanceof ChunkedView)
? [this, ...others]
: [...view.chunks, ...others];
: [...view.childVectors, ...others];
const offsets = ChunkedData.computeOffsets(vecs);
const chunksLength = offsets[offsets.length - 1];
const chunkedData = new ChunkedData(this.type, chunksLength, vecs, 0, -1, offsets);
Expand Down Expand Up @@ -377,7 +380,7 @@ export class DictionaryVector<T extends DataType = DataType> extends Vector<Dict
this.indicies = view.indicies;
this.dictionary = data.dictionary;
} else if (data instanceof ChunkedData && view instanceof ChunkedView) {
const chunks = view.chunks as DictionaryVector<T>[];
const chunks = view.childVectors as DictionaryVector<T>[];
// Assume the last chunk's dictionary data is the most up-to-date,
// including data from DictionaryBatches that were marked as deltas
this.dictionary = chunks[chunks.length - 1].dictionary;
Expand Down
32 changes: 19 additions & 13 deletions js/src/vector/chunked.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,37 @@
// under the License.

import { ChunkedData } from '../data';
import { View, Vector } from '../vector';
import { View, Vector, NestedVector } from '../vector';
import { DataType, TypedArray, IterableArrayLike } from '../type';

export class ChunkedView<T extends DataType> implements View<T> {
public chunks: Vector<T>[];
public offsets: Uint32Array;
public childVectors: Vector<T>[];
public childOffsets: Uint32Array;
protected _childColumns: Vector<any>[];
constructor(data: ChunkedData<T>) {
this.chunks = data.childVectors;
this.offsets = data.childOffsets;
this.childVectors = data.childVectors;
this.childOffsets = data.childOffsets;
}
public clone(data: ChunkedData<T>): this {
return new ChunkedView(data) as this;
}
public *[Symbol.iterator](): IterableIterator<T['TValue'] | null> {
for (const vector of this.chunks) {
for (const vector of this.childVectors) {
yield* vector;
}
}
public getChildAt<R extends DataType = DataType>(index: number) {
return (this._childColumns || (this._childColumns = []))[index] || (
this._childColumns[index] = Vector.concat<R>(
...(<any> this.childVectors as NestedVector<any>[]).map((v) => v.getChildAt(index))));
}
public isValid(index: number): boolean {
// binary search to find the child vector and value index offset (inlined for speed)
let offsets = this.offsets, pos = 0;
let offsets = this.childOffsets, pos = 0;
let lhs = 0, mid = 0, rhs = offsets.length - 1;
while (index < offsets[rhs] && index >= (pos = offsets[lhs])) {
if (lhs + 1 === rhs) {
return this.chunks[lhs].isValid(index - pos);
return this.childVectors[lhs].isValid(index - pos);
}
mid = lhs + ((rhs - lhs) / 2) | 0;
index >= offsets[mid] ? (lhs = mid) : (rhs = mid);
Expand All @@ -49,11 +55,11 @@ export class ChunkedView<T extends DataType> implements View<T> {
}
public get(index: number): T['TValue'] | null {
// binary search to find the child vector and value index offset (inlined for speed)
let offsets = this.offsets, pos = 0;
let offsets = this.childOffsets, pos = 0;
let lhs = 0, mid = 0, rhs = offsets.length - 1;
while (index < offsets[rhs] && index >= (pos = offsets[lhs])) {
if (lhs + 1 === rhs) {
return this.chunks[lhs].get(index - pos);
return this.childVectors[lhs].get(index - pos);
}
mid = lhs + ((rhs - lhs) / 2) | 0;
index >= offsets[mid] ? (lhs = mid) : (rhs = mid);
Expand All @@ -62,18 +68,18 @@ export class ChunkedView<T extends DataType> implements View<T> {
}
public set(index: number, value: T['TValue'] | null): void {
// binary search to find the child vector and value index offset (inlined for speed)
let offsets = this.offsets, pos = 0;
let offsets = this.childOffsets, pos = 0;
let lhs = 0, mid = 0, rhs = offsets.length - 1;
while (index < offsets[rhs] && index >= (pos = offsets[lhs])) {
if (lhs + 1 === rhs) {
return this.chunks[lhs].set(index - pos, value);
return this.childVectors[lhs].set(index - pos, value);
}
mid = lhs + ((rhs - lhs) / 2) | 0;
index >= offsets[mid] ? (lhs = mid) : (rhs = mid);
}
}
public toArray(): IterableArrayLike<T['TValue'] | null> {
const chunks = this.chunks;
const chunks = this.childVectors;
const numChunks = chunks.length;
if (numChunks === 1) {
return chunks[0].toArray();
Expand Down
18 changes: 9 additions & 9 deletions js/src/vector/nested.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,23 @@
// under the License.

import { Data } from '../data';
import { View, Vector } from '../vector';
import { IterableArrayLike } from '../type';
import { View, Vector, createVector } from '../vector';
import { DataType, NestedType, DenseUnion, SparseUnion, Struct, Map_ } from '../type';

export abstract class NestedView<T extends NestedType> implements View<T> {
public length: number;
public numChildren: number;
public childData: Data<any>[];
protected children: Vector<any>[];
protected _childColumns: Vector<any>[];
constructor(data: Data<T>, children?: Vector<any>[]) {
this.length = data.length;
this.childData = data.childData;
this.numChildren = data.childData.length;
this.children = children || new Array(this.numChildren);
this._childColumns = children || new Array(this.numChildren);
}
public clone(data: Data<T>): this {
return new (<any> this.constructor)(data, this.children) as this;
return new (<any> this.constructor)(data, this._childColumns) as this;
}
public isValid(): boolean {
return true;
Expand All @@ -53,8 +53,8 @@ export abstract class NestedView<T extends NestedType> implements View<T> {
protected abstract getNested(self: NestedView<T>, index: number): T['TValue'];
protected abstract setNested(self: NestedView<T>, index: number, value: T['TValue']): void;
public getChildAt<R extends DataType = DataType>(index: number) {
return this.children[index] || (
this.children[index] = createVector<R>(this.childData[index]));
return this._childColumns[index] || (
this._childColumns[index] = Vector.create<R>(this.childData[index]));
}
public *[Symbol.iterator](): IterableIterator<T['TValue']> {
const get = this.getNested;
Expand Down Expand Up @@ -120,7 +120,7 @@ export class DenseUnionView extends UnionView<DenseUnion> {

export class StructView extends NestedView<Struct> {
protected getNested(self: StructView, index: number) {
return new RowView(self as any, self.children, index);
return new RowView(self as any, self._childColumns, index);
}
protected setNested(self: StructView, index: number, value: any): void {
let idx = -1, len = self.numChildren;
Expand All @@ -140,7 +140,7 @@ export class MapView extends NestedView<Map_> {
(xs[x.name] = i) && xs || xs, Object.create(null));
}
protected getNested(self: MapView, index: number) {
return new MapRowView(self as any, self.children, index);
return new MapRowView(self as any, self._childColumns, index);
}
protected setNested(self: MapView, index: number, value: { [k: string]: any }): void {
const typeIds = self.typeIds as any;
Expand All @@ -160,7 +160,7 @@ export class RowView extends UnionView<SparseUnion> {
this.length = data.numChildren;
}
public clone(data: Data<SparseUnion> & NestedView<any>): this {
return new (<any> this.constructor)(data, this.children, this.rowIndex) as this;
return new (<any> this.constructor)(data, this._childColumns, this.rowIndex) as this;
}
protected getChildValue(self: RowView, index: number, _typeIds: any, _valueOffsets?: any): any | null {
const child = self.getChildAt(index);
Expand Down
8 changes: 4 additions & 4 deletions js/test/integration/validate-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ function testReaderIntegration(jsonData: any, arrowBuffer: Uint8Array) {
expect(jsonRecordBatch.length).toEqual(binaryRecordBatch.length);
expect(jsonRecordBatch.numCols).toEqual(binaryRecordBatch.numCols);
for (let i = -1, n = jsonRecordBatch.numCols; ++i < n;) {
(jsonRecordBatch.columns[i] as any).name = jsonRecordBatch.schema.fields[i].name;
(expect(jsonRecordBatch.columns[i]) as any).toEqualVector(binaryRecordBatch.columns[i]);
(jsonRecordBatch.getChildAt(i) as any).name = jsonRecordBatch.schema.fields[i].name;
(expect(jsonRecordBatch.getChildAt(i)) as any).toEqualVector(binaryRecordBatch.getChildAt(i));
}
}
});
Expand All @@ -147,8 +147,8 @@ function testTableFromBuffersIntegration(jsonData: any, arrowBuffer: Uint8Array)
expect(jsonTable.length).toEqual(binaryTable.length);
expect(jsonTable.numCols).toEqual(binaryTable.numCols);
for (let i = -1, n = jsonTable.numCols; ++i < n;) {
(jsonTable.columns[i] as any).name = jsonTable.schema.fields[i].name;
(expect(jsonTable.columns[i]) as any).toEqualVector(binaryTable.columns[i]);
(jsonTable.getColumnAt(i) as any).name = jsonTable.schema.fields[i].name;
(expect(jsonTable.getColumnAt(i)) as any).toEqualVector(binaryTable.getColumnAt(i));
}
});
}
6 changes: 4 additions & 2 deletions js/test/unit/table-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ describe(`Table`, () => {
test(`scans expected values`, () => {
let expected_idx = 0;
table.scan((idx, batch) => {
expect(batch.columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i));
expect(columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
});
});
test(`count() returns the correct length`, () => {
Expand Down Expand Up @@ -348,7 +349,8 @@ describe(`Table`, () => {
test(`scans expected values`, () => {
let expected_idx = 0;
table.scan((idx, batch) => {
expect(batch.columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i));
expect(columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
});
});
test(`count() returns the correct length`, () => {
Expand Down

0 comments on commit 54d4f5b

Please sign in to comment.