From aa999f87f16456145c29ac7c3ea21e13a9bdf11b Mon Sep 17 00:00:00 2001 From: Brian Hulette Date: Wed, 10 Jan 2018 14:38:49 -0500 Subject: [PATCH] Add DictionaryVector optimization for equals predicate --- js/perf/table_config.js | 2 +- js/src/dataframe/predicate.ts | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/js/perf/table_config.js b/js/perf/table_config.js index 06c9198353b80..7bface6d2cdde 100644 --- a/js/perf/table_config.js +++ b/js/perf/table_config.js @@ -25,7 +25,7 @@ const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.a tests = [ {col: 0, test: 'gteq', value: 0 }, {col: 1, test: 'gteq', value: 0 }, - //{col: 2, test: 'eq', value: 'Seattle'}, + {col: 2, test: 'eq', value: 'Seattle'}, ] for (const filename of filenames) { diff --git a/js/src/dataframe/predicate.ts b/js/src/dataframe/predicate.ts index 4438c0adbaa98..263b8646d71fc 100644 --- a/js/src/dataframe/predicate.ts +++ b/js/src/dataframe/predicate.ts @@ -1,4 +1,5 @@ import { Vector } from "../vector/vector"; +import { DictionaryVector } from "../vector/dictionary"; export type ValueFunc = (idx: number, cols: Vector[]) => T|null; export type PredicateFunc = (idx: number, cols: Vector[]) => boolean; @@ -118,7 +119,30 @@ class Equals extends ComparisonPredicate { protected _bindColLit(cols: Vector[], col: Col , lit: Literal ): PredicateFunc { const col_func = col.bind(cols); - return (idx: number, cols: Vector[]) => col_func(idx, cols) == lit.v; + if (col.vector instanceof DictionaryVector) { + // Assume that there is only one key with the value `lit.v` + let key = -1 + for (; ++key < col.vector.data.length;) { + if (col.vector.data.get(key) === lit.v) { + break; + } + } + + if (key == col.vector.data.length) { + // the value doesn't exist in the dictionary - always return + // false + // TODO: special-case of PredicateFunc that encapsulates this + // "always false" behavior. That way filtering operations don't + // have to bother checking + return () => false; + } else { + return (idx: number) => { + return (col.vector as DictionaryVector).getKey(idx) === key; + } + } + } else { + return (idx: number, cols: Vector[]) => col_func(idx, cols) == lit.v; + } } }