Skip to content

Commit

Permalink
Merge pull request #5 from TheNeuralBit/dictionary-vector-tests
Browse files Browse the repository at this point in the history
Add dictionary vector unit tests
  • Loading branch information
trxcllnt authored Feb 28, 2018
2 parents b0a0c08 + 3fb9a26 commit ccecf55
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 6 deletions.
3 changes: 3 additions & 0 deletions js/src/vector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,9 @@ export class DictionaryVector<T extends DataType = DataType> extends Vector<Dict
public readonly dictionary: Vector<T>;
constructor(data: Data<Dictionary<T>>, view: View<Dictionary<T>> = new DictionaryView<T>(data.dictionary, new IntVector(data.indices))) {
super(data as Data<any>, view);
if (view instanceof ValidityView) {
view = (view as any).view;
}
if (data instanceof DictionaryData && view instanceof DictionaryView) {
this.indices = view.indices;
this.dictionary = data.dictionary;
Expand Down
61 changes: 55 additions & 6 deletions js/test/unit/vector-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@

import { TextEncoder } from 'text-encoding-utf-8';
import Arrow from '../Arrow';
import { type, TypedArray, TypedArrayConstructor } from '../../src/Arrow';
import { type, TypedArray, TypedArrayConstructor, Vector } from '../../src/Arrow';
import { packBools } from '../../src/util/bit'

const utf8Encoder = new TextEncoder('utf-8');

const { BoolData, FlatData, FlatListData } = Arrow.data;
const { IntVector, FloatVector, BoolVector, Utf8Vector } = Arrow.vector;
const { BoolData, FlatData, FlatListData, DictionaryData } = Arrow.data;
const { IntVector, FloatVector, BoolVector, Utf8Vector, DictionaryVector } = Arrow.vector;
const {
Utf8, Bool,
Dictionary, Utf8, Bool,
Float16, Float32, Float64,
Int8, Int16, Int32, Int64,
Uint8, Uint16, Uint32, Uint64,
Expand Down Expand Up @@ -310,6 +311,54 @@ describe(`Utf8Vector`, () => {
let offset = 0;
const offsets = Uint32Array.of(0, ...values.map((d) => { offset += d.length; return offset; }));
const vector = new Utf8Vector(new FlatListData(new Utf8(), n, null, offsets, utf8Encoder.encode(values.join(''))));
basicVectorTests(vector, values, ['abc', '123']);
describe(`sliced`, () => {
basicVectorTests(vector.slice(1,3), values.slice(1,3), ['foo', 'abc']);
});
});

describe(`DictionaryVector`, () => {
const dictionary = ['foo', 'bar', 'baz'];
const extras = ['abc', '123']; // values to search for that should NOT be found
let offset = 0;
const offsets = Uint32Array.of(0, ...dictionary.map((d) => { offset += d.length; return offset; }));
const dictionary_vec = new Utf8Vector(new FlatListData(new Utf8(), dictionary.length, null, offsets, utf8Encoder.encode(dictionary.join(''))));

const indices = Array.from({length: 50}, () => Math.random() * 3 | 0);

describe(`index with nullCount == 0`, () => {
const indices_data = new FlatData(new Int32(), indices.length, new Uint8Array(0), indices);

const values = Array.from(indices).map((d) => dictionary[d]);
const vector = new DictionaryVector(new DictionaryData(new Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec, indices_data));

basicVectorTests(vector, values, extras);

describe(`sliced`, () => {
basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras);
})
});

describe(`index with nullCount > 0`, () => {
const validity = Array.from({length: indices.length}, () => Math.random() > 0.2 ? true : false);
const indices_data = new FlatData(new Int32(), indices.length, packBools(validity), indices, 0, validity.reduce((acc, d) => acc + (d ? 0 : 1), 0));
const values = Array.from(indices).map((d, i) => validity[i] ? dictionary[d] : null);
const vector = new DictionaryVector(new DictionaryData(new Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec, indices_data));

basicVectorTests(vector, values, ['abc', '123']);
describe(`sliced`, () => {
basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras);
});
});
});

// Creates some basic tests for the given vector.
// Verifies that:
// - `get` and the native iterator return the same data as `values`
// - `indexOf` returns the same indices as `values`
function basicVectorTests(vector: Vector, values: any[], extras: any[]) {
const n = values.length;

test(`gets expected values`, () => {
let i = -1;
while (++i < n) {
Expand All @@ -325,14 +374,14 @@ describe(`Utf8Vector`, () => {
}
});
test(`indexOf returns expected values`, () => {
let testValues = values.concat(['abc', '12345']);
let testValues = values.concat(extras);

for (const value of testValues) {
const expected = values.indexOf(value);
expect(vector.indexOf(value)).toEqual(expected);
}
});
});
}

function toMap<T>(entries: Record<string, T>, keys: string[]) {
return keys.reduce((map, key) => {
Expand Down

0 comments on commit ccecf55

Please sign in to comment.