diff --git a/js/perf/index.js b/js/perf/index.js index 74dbd872d8a6e..b5789e8b34c07 100644 --- a/js/perf/index.js +++ b/js/perf/index.js @@ -16,10 +16,10 @@ // under the License. // Use the ES5 UMD target as perf baseline -// const { DataFrame, Table, readVectors } = require('../targets/es5/umd'); -// const { DataFrame, Table, readVectors } = require('../targets/es5/cjs'); -// const { DataFrame, Table, readVectors } = require('../targets/es2015/umd'); -const { DataFrame, Table, readVectors } = require('../targets/es2015/cjs'); +// const { lit, col, DataFrame, Table, readVectors } = require('../targets/es5/umd'); +// const { lit, col, DataFrame, Table, readVectors } = require('../targets/es5/cjs'); +// const { lit, col, DataFrame, Table, readVectors } = require('../targets/es2015/umd'); +const { lit, col, DataFrame, Table, readVectors } = require('../targets/es2015/cjs'); const config = require('./config'); const Benchmark = require('benchmark'); @@ -280,9 +280,9 @@ function createDataFrameScanCountTest(table, column, test, value) { function createDataFrameFilterCountTest(table, column, test, value) { let df = DataFrame.from(table); if (test == 'gteq') { - df = df.filter((idx, cols)=>cols[column].get(idx) >= value); + df = df.filter(col(table.columns[column].name).gteq(value)); } else if (test == 'eq') { - df = df.filter((idx, cols)=>cols[column].get(idx) == value); + df = df.filter(col(table.columns[column].name).eq(value)); } else { throw new Error(`Unrecognized test "${test}"`); } diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts index a52deeb4992c0..ce7235b8b13d4 100644 --- a/js/src/Arrow.ts +++ b/js/src/Arrow.ts @@ -46,6 +46,7 @@ import { } from './vector/numeric'; import { DataFrame } from './dataframe/dataframe'; +import { lit, col } from './dataframe/predicate'; // closure compiler always erases static method names: // https://github.com/google/closure-compiler/issues/1776 @@ -88,12 +89,16 @@ export { }; export { DataFrame } from './dataframe/dataframe'; +export { lit, col } from './dataframe/predicate'; + /* These exports are needed for the closure umd targets */ try { const Arrow = eval('exports'); if (typeof Arrow === 'object') { // string indexers tell closure compiler not to rename these properties + Arrow['lit'] = lit; + Arrow['col'] = col; Arrow['read'] = read; Arrow['readAsync'] = readAsync; Arrow['Table'] = Table; diff --git a/js/src/dataframe/dataframe.ts b/js/src/dataframe/dataframe.ts index dc3dd78156b9e..0dbb19bbbdd4e 100644 --- a/js/src/dataframe/dataframe.ts +++ b/js/src/dataframe/dataframe.ts @@ -2,15 +2,16 @@ import { Vector } from "../vector/vector"; import { StructVector } from "../vector/struct"; import { VirtualVector } from "../vector/virtual"; +import { Predicate } from "./predicate" + export type NextFunc = (idx: number, cols: Vector[]) => void; -export type PredicateFunc = (idx: number, cols: Vector[]) => boolean; export abstract class DataFrame { constructor(readonly lengths: Uint32Array) {} public abstract columns: Vector[]; public abstract getBatch(batch: number): Vector[]; public abstract scan(next: NextFunc): void; - public filter(predicate: PredicateFunc): DataFrame { + public filter(predicate: Predicate): DataFrame { return new FilteredDataFrame(this, predicate); } @@ -120,7 +121,7 @@ class ChunkedDataFrame extends DataFrame { class FilteredDataFrame extends DataFrame { public columns: Vector[]; - constructor (readonly parent: DataFrame, private predicate: PredicateFunc) { + constructor (readonly parent: DataFrame, private predicate: Predicate) { super(parent.lengths); } @@ -138,10 +139,11 @@ class FilteredDataFrame extends DataFrame { // load batches const columns = this.parent.getBatch(batch); + const predicate = this.predicate.bind(columns); // yield all indices for (let idx = -1; ++idx < length;) { - if (this.predicate(idx, columns)) next(idx, columns); + if (predicate(idx, columns)) next(idx, columns); } } } @@ -159,19 +161,20 @@ class FilteredDataFrame extends DataFrame { // load batches const columns = this.parent.getBatch(batch); + const predicate = this.predicate.bind(columns); // yield all indices for (let idx = -1; ++idx < length;) { - if (this.predicate(idx, columns)) ++sum; + if (predicate(idx, columns)) ++sum; } } return sum; } - filter(predicate: PredicateFunc): DataFrame { + filter(predicate: Predicate): DataFrame { return new FilteredDataFrame( this.parent, - (idx, cols) => this.predicate(idx, cols) && predicate(idx, cols) + this.predicate.and(predicate) ); } } diff --git a/js/src/dataframe/predicate.ts b/js/src/dataframe/predicate.ts new file mode 100644 index 0000000000000..4438c0adbaa98 --- /dev/null +++ b/js/src/dataframe/predicate.ts @@ -0,0 +1,171 @@ +import { Vector } from "../vector/vector"; + +export type ValueFunc = (idx: number, cols: Vector[]) => T|null; +export type PredicateFunc = (idx: number, cols: Vector[]) => boolean; + +export abstract class Value { + eq(other: Value|T): Predicate { + if (!(other instanceof Value)) other = new Literal(other); + return new Equals(this, other); + } + lteq(other: Value|T): Predicate { + if (!(other instanceof Value)) other = new Literal(other); + return new LTeq(this, other); + } + gteq(other: Value|T): Predicate { + if (!(other instanceof Value)) other = new Literal(other); + return new GTeq(this, other); + } +} + +class Literal extends Value { + constructor(public v: T) { super(); } +} + +class Col extends Value { + vector: Vector; + colidx: number; + + constructor(public name: string) { super(); } + bind(cols: Vector[]) { + if (!this.colidx) { + // Assume column index doesn't change between calls to bind + //this.colidx = cols.findIndex(v => v.name.indexOf(this.name) != -1); + this.colidx = -1; + for (let idx = -1; ++idx < cols.length;) { + if (cols[idx].name === this.name) { + this.colidx = idx; + break; + } + } + if (this.colidx < 0) throw new Error(`Failed to bind Col "${this.name}"`) + } + this.vector = cols[this.colidx] + return this.vector.get.bind(this.vector); + } + + emitString() { return `cols[${this.colidx}].get(idx)`; } +} + +export abstract class Predicate { + abstract bind(cols: Vector[]): PredicateFunc; + and(expr: Predicate): Predicate { return new And(this, expr); } + or(expr: Predicate): Predicate { return new Or(this, expr); } + ands(): Predicate[] { return [this]; } +} + +abstract class ComparisonPredicate extends Predicate { + constructor(public readonly left: Value, public readonly right: Value) { + super(); + } + + bind(cols: Vector[]) { + if (this.left instanceof Literal) { + if (this.right instanceof Literal) { + return this._bindLitLit(cols, this.left, this.right); + } else { // right is a Col + + return this._bindColLit(cols, this.right as Col, this.left); + } + } else { // left is a Col + if (this.right instanceof Literal) { + return this._bindColLit(cols, this.left as Col, this.right); + } else { // right is a Col + return this._bindColCol(cols, this.left as Col, this.right as Col); + } + } + } + + protected abstract _bindLitLit(cols: Vector[], left: Literal, right: Literal): PredicateFunc; + protected abstract _bindColCol(cols: Vector[], left: Col , right: Col ): PredicateFunc; + protected abstract _bindColLit(cols: Vector[], col: Col , lit: Literal ): PredicateFunc; +} + +abstract class CombinationPredicate extends Predicate { + constructor(public readonly left: Predicate, public readonly right: Predicate) { + super(); + } +} + +class And extends CombinationPredicate { + bind(cols: Vector[]) { + const left = this.left.bind(cols); + const right = this.right.bind(cols); + return (idx: number, cols: Vector[]) => left(idx, cols) && right(idx, cols); + } + ands() : Predicate[] { return this.left.ands().concat(this.right.ands()); } +} + +class Or extends CombinationPredicate { + bind(cols: Vector[]) { + const left = this.left.bind(cols); + const right = this.right.bind(cols); + return (idx: number, cols: Vector[]) => left(idx, cols) || right(idx, cols); + } +} + +class Equals extends ComparisonPredicate { + protected _bindLitLit(_: Vector[], left: Literal, right: Literal): PredicateFunc { + const rtrn: boolean = left.v == right.v; + return () => rtrn; + } + + protected _bindColCol(cols: Vector[], left: Col , right: Col ): PredicateFunc { + const left_func = left.bind(cols); + const right_func = right.bind(cols); + return (idx: number, cols: Vector[]) => left_func(idx, cols) == right_func(idx, cols); + } + + protected _bindColLit(cols: Vector[], col: Col , lit: Literal ): PredicateFunc { + const col_func = col.bind(cols); + return (idx: number, cols: Vector[]) => col_func(idx, cols) == lit.v; + } +} + +class LTeq extends ComparisonPredicate { + protected _bindLitLit(_: Vector[], left: Literal, right: Literal): PredicateFunc { + const rtrn: boolean = left.v <= right.v; + return () => rtrn; + } + + protected _bindColCol(cols: Vector[], left: Col , right: Col ): PredicateFunc { + const left_func = left.bind(cols); + const right_func = right.bind(cols); + return (idx: number, cols: Vector[]) => left_func(idx, cols) <= right_func(idx, cols); + } + + protected _bindColLit(cols: Vector[], col: Col , lit: Literal ): PredicateFunc { + const col_func = col.bind(cols); + return (idx: number, cols: Vector[]) => col_func(idx, cols) <= lit.v; + } +} + +class GTeq extends ComparisonPredicate { + protected _bindLitLit(_: Vector[], left: Literal, right: Literal): PredicateFunc { + const rtrn: boolean = left.v >= right.v; + return () => rtrn; + } + + protected _bindColCol(cols: Vector[], left: Col, right: Col): PredicateFunc { + const left_func = left.bind(cols); + const right_func = right.bind(cols); + return (idx: number, cols: Vector[]) => left_func(idx, cols) >= right_func(idx, cols); + } + + protected _bindColLit(cols: Vector[], col: Col, lit: Literal): PredicateFunc { + const col_func = col.bind(cols); + return (idx: number, cols: Vector[]) => col_func(idx, cols) >= lit.v; + } + //eval(idx: number, cols: Vector[]) { + // return this.left.eval(idx, cols) >= this.right.eval(idx, cols); + //} + //emitString() { + // return `${this.left.emitString()} >= ${this.right.emitString()}` + //} + //createDictionaryEval(schema, lit: Literal, col: Col): (idx: number, cols: Vector[]) => boolean { + // return this.eval; + //} +} + +export function lit(n: number): Value { return new Literal(n); } +export function col(n: string): Value { return new Col(n); }