From 99e58da5d58b8587657413b4bcca14a773d6de36 Mon Sep 17 00:00:00 2001
From: Paul Taylor <paul.e.taylor@me.com>
Date: Thu, 11 Jan 2018 18:35:05 -0500
Subject: [PATCH 01/19] ARROW-1979: [JS] Fix JS builds hanging in es2015

Also fixes [ARROW-1903](https://issues.apache.org/jira/browse/ARROW-1903)

Author: Paul Taylor <paul.e.taylor@me.com>

Closes #1471 from trxcllnt/fix-js-es2015-builds and squashes the following commits:

62db3381 [Paul Taylor] update dependencies and add es6+ umd targets to jest transform ignore patterns to fix ci
6ff18e94 [Paul Taylor] ship es2015 commonJS in main package to avoid confusion
74e828af [Paul Taylor] fix typings issues (ARROW-1903)
---
 js/README.md                          |  2 +-
 js/gulp/arrow-task.js                 |  8 +++---
 js/gulp/package-task.js               |  6 ++---
 js/gulp/test-task.js                  |  2 +-
 js/package.json                       | 38 +++++++++++++--------------
 js/src/text-encoding-utf-8.d.ts       |  4 ---
 js/src/vector/numeric.ts              |  7 ++---
 js/src/vector/virtual.ts              |  2 +-
 js/test/Arrow.ts                      |  2 +-
 js/test/integration/validate-tests.ts |  2 +-
 10 files changed, 35 insertions(+), 38 deletions(-)
 delete mode 100644 js/src/text-encoding-utf-8.d.ts

diff --git a/js/README.md b/js/README.md
index e58d335cd0d42..b427923e37ea1 100644
--- a/js/README.md
+++ b/js/README.md
@@ -178,7 +178,7 @@ The base `apache-arrow` package includes all the compilation targets for conveni
 The targets are also published under the `@apache-arrow` namespace:
 
 ```sh
-npm install apache-arrow # <-- combined es5/CommonJS + UMD, es2015/ESModules + UMD, and TypeScript package
+npm install apache-arrow # <-- combined es5/UMD, es2015/CommonJS/ESModules/UMD, and TypeScript package
 npm install @apache-arrow/ts # standalone TypeScript package
 npm install @apache-arrow/es5-cjs # standalone es5/CommonJS package
 npm install @apache-arrow/es5-esm # standalone es5/ESModules package
diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js
index cc33ee14497b6..d1e8046e67ab9 100644
--- a/js/gulp/arrow-task.js
+++ b/js/gulp/arrow-task.js
@@ -28,8 +28,8 @@ const { Observable, ReplaySubject } = require('rxjs');
 
 const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target, format) {
     const out = targetDir(target);
-    const srcGlob = `src/**/*.ts`;
-    const es5Glob = `${targetDir(`es5`, `cjs`)}/**/*.js`;
+    const dtsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.ts`;
+    const cjsGlob = `${targetDir(`es2015`, `cjs`)}/**/*.js`;
     const esmGlob = `${targetDir(`es2015`, `esm`)}/**/*.js`;
     const es5UmdGlob = `${targetDir(`es5`, `umd`)}/**/*.js`;
     const es5UmdMaps = `${targetDir(`es5`, `umd`)}/**/*.map`;
@@ -38,8 +38,8 @@ const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target, forma
     const ch_ext = (ext) => gulpRename((p) => { p.extname = ext; });
     const append = (ap) => gulpRename((p) => { p.basename += ap; });
     return Observable.forkJoin(
-      observableFromStreams(gulp.src(srcGlob), gulp.dest(out)), // copy src ts files
-      observableFromStreams(gulp.src(es5Glob), gulp.dest(out)), // copy es5 cjs files
+      observableFromStreams(gulp.src(dtsGlob), gulp.dest(out)), // copy d.ts files
+      observableFromStreams(gulp.src(cjsGlob), gulp.dest(out)), // copy es2015 cjs files
       observableFromStreams(gulp.src(esmGlob), ch_ext(`.mjs`), gulp.dest(out)), // copy es2015 esm files and rename to `.mjs`
       observableFromStreams(gulp.src(es5UmdGlob), append(`.es5.min`), gulp.dest(out)), // copy es5 umd files and add `.min`
       observableFromStreams(gulp.src(es5UmdMaps),                     gulp.dest(out)), // copy es5 umd sourcemap files, but don't rename
diff --git a/js/gulp/package-task.js b/js/gulp/package-task.js
index fc959643503bd..2976d0ad45d09 100644
--- a/js/gulp/package-task.js
+++ b/js/gulp/package-task.js
@@ -46,8 +46,8 @@ const createMainPackageJson = (target, format) => (orig) => ({
     name: npmPkgName,
     main: mainExport,
     module: `${mainExport}.mjs`,
-    browser: `${mainExport}.es5.min.js`,
-    [`browser:es2015`]: `${mainExport}.es2015.min.js`,
+    dist: `${mainExport}.es5.min.js`,
+    [`dist:es2015`]: `${mainExport}.es2015.min.js`,
     [`@std/esm`]: { esm: `mjs` }
 });
   
@@ -67,7 +67,7 @@ const createScopedPackageJSON = (target, format) => (({ name, ...orig }) =>
         (xs, key) => ({ ...xs, [key]: xs[key] || orig[key] }),
         { name: `${npmOrgName}/${packageName(target, format)}`,
           version: undefined, main: `${mainExport}.js`, types: `${mainExport}.d.ts`,
-          browser: undefined, [`browser:es2015`]: undefined, module: undefined, [`@std/esm`]: undefined }
+          dist: undefined, [`dist:es2015`]: undefined, module: undefined, [`@std/esm`]: undefined }
       )
     )
 );
diff --git a/js/gulp/test-task.js b/js/gulp/test-task.js
index f21aaf2364d03..ab280b092635c 100644
--- a/js/gulp/test-task.js
+++ b/js/gulp/test-task.js
@@ -34,7 +34,7 @@ argv.update && jestArgv.push(`-u`);
 argv.verbose && jestArgv.push(`--verbose`);
 argv.coverage && jestArgv.push(`--coverage`);
 
-const debugArgv = [`--runInBand`, `--env`, `jest-environment-node-debug`];
+const debugArgv = [`--runInBand`, `--env`, `node-debug`];
 const jest = require.resolve(path.join(`..`, `node_modules`, `.bin`, `jest`));
 const testOptions = {
     env: { ...process.env },
diff --git a/js/package.json b/js/package.json
index 3903d1eedc442..d68e7a6279e61 100644
--- a/js/package.json
+++ b/js/package.json
@@ -49,10 +49,8 @@
     "gulpfile.js",
     "npm-release.sh"
   ],
-  "peerDependencies": {
-    "command-line-usage": "4.0.1"
-  },
   "dependencies": {
+    "@types/text-encoding-utf-8": "1.0.1",
     "command-line-args": "4.0.7",
     "command-line-usage": "4.0.2",
     "flatbuffers": "trxcllnt/flatbuffers-esm",
@@ -61,45 +59,44 @@
     "tslib": "1.8.1"
   },
   "devDependencies": {
-    "@std/esm": "0.18.0",
+    "@std/esm": "0.19.1",
     "@types/flatbuffers": "1.6.5",
     "@types/glob": "5.0.34",
-    "@types/jest": "21.1.8",
-    "@types/node": "8.5.0",
-    "@types/text-encoding": "0.0.32",
+    "@types/jest": "22.0.1",
+    "@types/node": "9.3.0",
     "ast-types": "0.10.1",
     "benchmark": "2.1.4",
     "coveralls": "3.0.0",
     "del": "3.0.0",
-    "esdoc": "1.0.3",
+    "esdoc": "1.0.4",
     "esdoc-standard-plugin": "1.0.0",
     "glob": "7.1.2",
-    "google-closure-compiler": "20171203.0.0",
+    "google-closure-compiler": "20180101.0.0",
     "gulp": "github:gulpjs/gulp#6d71a658c61edb3090221579d8f97dbe086ba2ed",
     "gulp-json-transform": "0.4.5",
     "gulp-rename": "1.2.2",
-    "gulp-sourcemaps": "2.6.1",
+    "gulp-sourcemaps": "2.6.3",
     "gulp-transform-js-ast": "1.0.2",
     "gulp-typescript": "3.2.3",
     "ix": "2.3.4",
-    "jest": "21.2.1",
+    "jest": "22.0.5",
     "jest-environment-node-debug": "2.0.0",
     "json": "9.0.6",
-    "lerna": "2.5.1",
+    "lerna": "2.6.0",
     "lint-staged": "6.0.0",
-    "merge2": "1.2.0",
+    "merge2": "1.2.1",
     "mkdirp": "0.5.1",
     "npm-run-all": "4.1.2",
     "pump": "1.0.2",
     "rimraf": "2.6.2",
-    "rxjs": "5.5.5",
+    "rxjs": "5.5.6",
     "shx": "0.2.2",
     "source-map-loader": "0.2.3",
     "trash": "4.2.1",
-    "ts-jest": "21.2.4",
-    "tslint": "5.8.0",
+    "ts-jest": "22.0.1",
+    "tslint": "5.9.1",
     "typescript": "2.6.2",
-    "uglifyjs-webpack-plugin": "1.1.2",
+    "uglifyjs-webpack-plugin": "1.1.6",
     "webpack": "3.10.0",
     "xml2js": "0.4.19"
   },
@@ -134,9 +131,12 @@
       "/node_modules/"
     ],
     "transform": {
-      ".(ts|tsx)": "<rootDir>/node_modules/ts-jest/preprocessor.js",
-      ".(js|jsx)": "<rootDir>/node_modules/babel-jest/build/index.js"
+      ".(ts|tsx)": "./node_modules/ts-jest/preprocessor.js",
+      ".(js|jsx)": "./node_modules/babel-jest/build/index.js"
     },
+    "transformIgnorePatterns": [
+      "/node_modules/", "/(es2015|esnext)\/umd/"
+    ],
     "testRegex": "(.*(-|\\.)(test|spec)s?)\\.(ts|tsx|js)$"
   }
 }
diff --git a/js/src/text-encoding-utf-8.d.ts b/js/src/text-encoding-utf-8.d.ts
deleted file mode 100644
index 68ba4dfd9a346..0000000000000
--- a/js/src/text-encoding-utf-8.d.ts
+++ /dev/null
@@ -1,4 +0,0 @@
-declare module 'text-encoding-utf-8' {
-    import * as TextEncoding from 'text-encoding';
-    export = TextEncoding;
-}
diff --git a/js/src/vector/numeric.ts b/js/src/vector/numeric.ts
index fe4767809f465..830d6082bcc4a 100644
--- a/js/src/vector/numeric.ts
+++ b/js/src/vector/numeric.ts
@@ -34,10 +34,10 @@ export class NumericVector<T, TArray extends TypedArray> extends Vector<T> {
     concat(...vectors: Vector<T>[]): Vector<T> {
         return new VirtualVector(this.data.constructor as TypedArrayConstructor, this, ...vectors);
     }
-    slice(start?: number, end?: number) {
+    slice<R = TArray>(start?: number, end?: number): R {
         const { data, stride } = this, from = start! | 0;
         const to = end === undefined ? data.length : Math.max(end | 0, from);
-        return data.subarray(Math.min(from, to) * stride | 0, to * stride | 0);
+        return data.subarray(Math.min(from, to) * stride | 0, to * stride | 0) as any as R;
     }
 }
 
@@ -49,7 +49,8 @@ export class FixedWidthNumericVector<T, TArray extends TypedArray> extends Numer
 
 export class BoolVector extends NumericVector<boolean, Uint8Array> {
     static pack(values: Iterable<any>) {
-        let xs = [], n, i = 0;
+        let n = 0, i = 0;
+        let xs: number[] = [];
         let bit = 0, byte = 0;
         for (const value of values) {
             value && (byte |= 1 << bit);
diff --git a/js/src/vector/virtual.ts b/js/src/vector/virtual.ts
index 6ec3a8eef9f4d..42db78706db51 100644
--- a/js/src/vector/virtual.ts
+++ b/js/src/vector/virtual.ts
@@ -93,7 +93,7 @@ export class VirtualVector<T> implements Vector<T> {
                 // this is a significant improvement as we avoid the memcpy 🎉
                 if ((source.length / vector.stride | 0) < total) {
                     let vectorsLength = vectors.length;
-                    let count = 0, length = 0, sources = [];
+                    let count = 0, length = 0, sources = [] as any[];
                     do {
                         sources.push(source);
                         length += source.length;
diff --git a/js/test/Arrow.ts b/js/test/Arrow.ts
index 87641e52bf3f8..f2c4e930f92e4 100644
--- a/js/test/Arrow.ts
+++ b/js/test/Arrow.ts
@@ -16,7 +16,7 @@
 // under the License.
 
 /* tslint:disable */
-// Dynamically load an Ix target build based on command line arguments
+// Dynamically load an Arrow target build based on command line arguments
 
 const path = require('path');
 const target = process.env.TEST_TARGET!;
diff --git a/js/test/integration/validate-tests.ts b/js/test/integration/validate-tests.ts
index c8778ba2b33c2..c612d62ad0c04 100644
--- a/js/test/integration/validate-tests.ts
+++ b/js/test/integration/validate-tests.ts
@@ -37,7 +37,7 @@ const arrowBuffers: Uint8Array[] = [fs.readFileSync(arrowPath)];
 
 import Arrow from '../Arrow';
 import { zip } from 'ix/iterable/zip';
-import { toArray } from 'ix/iterable/toArray';
+import { toArray } from 'ix/iterable/toarray';
 
 const { Table, read } = Arrow;
 

From a1edac2095b43fa93cfdff99f1aee900f68af4cf Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Fri, 5 Jan 2018 11:47:42 -0500
Subject: [PATCH 02/19] Add perf tests for table scans

---
 js/generate.py          | 36 +++++++++++++++++++
 js/perf/index.js        | 78 +++++++++++++++++++++++++++++++++++++++++
 js/perf/table_config.js | 36 +++++++++++++++++++
 3 files changed, 150 insertions(+)
 create mode 100644 js/generate.py
 create mode 100644 js/perf/table_config.js

diff --git a/js/generate.py b/js/generate.py
new file mode 100644
index 0000000000000..bf663fb0b1f9f
--- /dev/null
+++ b/js/generate.py
@@ -0,0 +1,36 @@
+import pyarrow as pa
+import random
+import numpy as np
+import pandas as pd
+
+
+cities = [u'Charlottesville', u'New York', u'San Francisco', u'Seattle', u'Terre Haute', u'Washington, DC']
+
+def generate_batch(batch_len):
+    return pa.RecordBatch.from_arrays([
+        pa.Array.from_pandas(pd.Series(np.random.uniform(-90,90,batch_len), dtype="float32")),
+        pa.Array.from_pandas(pd.Series(np.random.uniform(-180,180,batch_len), dtype="float32")),
+        pa.Array.from_pandas(pd.Categorical((random.choice(cities) for i in range(batch_len)), cities)),
+        pa.Array.from_pandas(pd.Categorical((random.choice(cities) for i in range(batch_len)), cities))
+    ], ['lat', 'lng', 'origin', 'destination'])
+
+def write_record_batches(fd, batch_len, num_batches):
+    writer = pa.ipc.RecordBatchStreamWriter(fd, generate_batch(1).schema)
+    for batch in range(num_batches):
+        writer.write_batch(generate_batch(batch_len))
+
+    writer.close()
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filename', help='number of batches')
+    parser.add_argument('-n', '--num-batches', help='number of batches', type=int, default=10)
+    parser.add_argument('-b', '--batch-size', help='size of each batch', type=int, default=100000)
+
+    args = parser.parse_args()
+
+    print "Writing {} {}-element batches to '{}'".format(args.num_batches, args.batch_size, args.filename)
+    with open(args.filename, 'w') as fd:
+        write_record_batches(fd, args.batch_size, args.num_batches)
diff --git a/js/perf/index.js b/js/perf/index.js
index 9eac40e64ac71..03501913d8155 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -41,6 +41,21 @@ for (let { name, buffers} of config) {
     suites.push(getByIndexSuite, iterateSuite, sliceSuite, parseSuite);
 }
 
+for (let {name, buffers, tests} of require('./table_config')) {
+    const tableIterateSuite = new Benchmark.Suite(`Table Iterate ${name}`, { async: true });
+    const tableCountBySuite = new Benchmark.Suite(`Table Count By ${name}`, { async: true });
+    const vectorCountBySuite = new Benchmark.Suite(`Vector Count By ${name}`, { async: true });
+    const table = Table.from(buffers);
+
+    tableIterateSuite.add(createTableIterateTest(table));
+    for (test of tests) {
+        tableCountBySuite.add(createTableCountByTest(table, test.col, test.test, test.value))
+        vectorCountBySuite.add(createVectorCountByTest(table.columns[test.col], test.test, test.value))
+    }
+
+    suites.push(tableIterateSuite, tableCountBySuite, vectorCountBySuite)
+}
+
 console.log('Running apache-arrow performance tests...\n');
 
 run();
@@ -109,3 +124,66 @@ function createGetByIndexTest(vector) {
         }
     };
 }
+
+function createVectorCountByTest(vector, test, value) {
+    let op;
+    if (test == 'gteq') {
+        op = function () {
+            sum = 0;
+            for (cell of vector) {
+                sum += (cell >= value)
+            }
+        }
+    } else if (test == 'eq') {
+        op = function () {
+            sum = 0;
+            for (cell of vector) {
+                sum += (cell == value)
+            }
+        }
+    } else {
+        throw new Error(`Unrecognized test "$test"`);
+    }
+
+    return {
+        async: true,
+        name: `name: '${vector.name}', length: ${vector.length}, type: ${vector.type}, test: ${test}, value: ${value}`,
+        fn: op
+    };
+}
+
+function createTableIterateTest(table) {
+    let row;
+    return {
+        async: true,
+        name: `length: ${table.length}`,
+        fn() { for (row of table) {} }
+    };
+}
+
+function createTableCountByTest(table, column, test, value) {
+    let op;
+    if (test == 'gteq') {
+        op = function () {
+            sum = 0;
+            for (row of table) {
+                sum += (row.get(column) >= value)
+            }
+        }
+    } else if (test == 'eq') {
+        op = function() {
+            sum = 0;
+            for (row of table) {
+                sum += (row.get(column) == value)
+            }
+        }
+    } else {
+        throw new Error(`Unrecognized test "${test}"`);
+    }
+
+    return {
+        async: true,
+        name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
+        fn: op
+    };
+}
diff --git a/js/perf/table_config.js b/js/perf/table_config.js
new file mode 100644
index 0000000000000..7bface6d2cdde
--- /dev/null
+++ b/js/perf/table_config.js
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+const fs = require('fs');
+const path = require('path');
+const glob = require('glob');
+
+const config = [];
+const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.arrow`));
+
+tests = [
+    {col: 0, test: 'gteq', value: 0        },
+    {col: 1, test: 'gteq', value: 0        },
+    {col: 2, test:   'eq', value: 'Seattle'},
+]
+
+for (const filename of filenames) {
+    const { name } = path.parse(filename);
+    config.push({ name, buffers: [fs.readFileSync(filename)], tests });
+}
+
+module.exports = config;

From 30f0330997602a9e817b536b2bfd0f8dbaf2aa4d Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Tue, 9 Jan 2018 17:24:25 -0500
Subject: [PATCH 03/19] Add basic DataFrame impl ...

... and a bunch of performance tests for various scanning approaches
---
 js/perf/index.js              | 171 ++++++++++++++++++++++++++++------
 js/perf/table_config.js       |   2 +-
 js/src/Arrow.ts               |   6 ++
 js/src/dataframe/dataframe.ts | 109 ++++++++++++++++++++++
 js/src/vector/virtual.ts      |   3 +
 5 files changed, 262 insertions(+), 29 deletions(-)
 create mode 100644 js/src/dataframe/dataframe.ts

diff --git a/js/perf/index.js b/js/perf/index.js
index 03501913d8155..95396a986de01 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -16,44 +16,52 @@
 // under the License.
 
 // Use the ES5 UMD target as perf baseline
-// const { Table, readVectors } = require('../targets/es5/umd');
-// const { Table, readVectors } = require('../targets/es5/cjs');
-const { Table, readVectors } = require('../targets/es2015/umd');
-// const { Table, readVectors } = require('../targets/es2015/cjs');
+// const { DataFrame, Table, readVectors } = require('../targets/es5/umd');
+// const { DataFrame, Table, readVectors } = require('../targets/es5/cjs');
+// const { DataFrame, Table, readVectors } = require('../targets/es2015/umd');
+const { DataFrame, Table, readVectors } = require('../targets/es2015/cjs');
 
 const config = require('./config');
 const Benchmark = require('benchmark');
 
 const suites = [];
 
-for (let { name, buffers} of config) {
-    const parseSuite = new Benchmark.Suite(`Parse ${name}`, { async: true });
-    const sliceSuite = new Benchmark.Suite(`Slice ${name} vectors`, { async: true });
-    const iterateSuite = new Benchmark.Suite(`Iterate ${name} vectors`, { async: true });
-    const getByIndexSuite = new Benchmark.Suite(`Get ${name} values by index`, { async: true });
-    parseSuite.add(createFromTableTest(name, buffers));
-    parseSuite.add(createReadVectorsTest(name, buffers));
-    for (const vector of Table.from(buffers).columns) {
-        sliceSuite.add(createSliceTest(vector));
-        iterateSuite.add(createIterateTest(vector));
-        getByIndexSuite.add(createGetByIndexTest(vector));
-    }
-    suites.push(getByIndexSuite, iterateSuite, sliceSuite, parseSuite);
-}
+//for (let { name, buffers} of config) {
+//    const parseSuite = new Benchmark.Suite(`Parse "${name}"`, { async: true });
+//    const sliceSuite = new Benchmark.Suite(`Slice "${name}" vectors`, { async: true });
+//    const iterateSuite = new Benchmark.Suite(`Iterate "${name}" vectors`, { async: true });
+//    const getByIndexSuite = new Benchmark.Suite(`Get "${name}" values by index`, { async: true });
+//    parseSuite.add(createFromTableTest(name, buffers));
+//    parseSuite.add(createReadVectorsTest(name, buffers));
+//    for (const vector of Table.from(buffers).columns) {
+//        sliceSuite.add(createSliceTest(vector));
+//        iterateSuite.add(createIterateTest(vector));
+//        getByIndexSuite.add(createGetByIndexTest(vector));
+//    }
+//    suites.push(getByIndexSuite, iterateSuite, sliceSuite, parseSuite);
+//}
 
 for (let {name, buffers, tests} of require('./table_config')) {
-    const tableIterateSuite = new Benchmark.Suite(`Table Iterate ${name}`, { async: true });
-    const tableCountBySuite = new Benchmark.Suite(`Table Count By ${name}`, { async: true });
-    const vectorCountBySuite = new Benchmark.Suite(`Vector Count By ${name}`, { async: true });
+    const tableIteratorSuite = new Benchmark.Suite(`Table Iterator "${name}"`, { async: true });
+    const tableCountSuite = new Benchmark.Suite(`Table Count "${name}"`, { async: true });
+    const dfIteratorSuite = new Benchmark.Suite(`DataFrame Iterator "${name}"`, { async: true });
+    const dfIteratorCountSuite = new Benchmark.Suite(`DataFrame Iterator Count "${name}"`, { async: true });
+    const dfDirectCountSuite = new Benchmark.Suite(`DataFrame Direct Count "${name}"`, { async: true });
+    const dfScanCountSuite = new Benchmark.Suite(`DataFrame Scan Count "${name}"`, { async: true });
+    const vectorCountSuite = new Benchmark.Suite(`Vector Count "${name}"`, { async: true });
     const table = Table.from(buffers);
 
-    tableIterateSuite.add(createTableIterateTest(table));
+    tableIteratorSuite.add(createTableIteratorTest(table));
+    dfIteratorSuite.add(createDataFrameIteratorTest(table));
     for (test of tests) {
-        tableCountBySuite.add(createTableCountByTest(table, test.col, test.test, test.value))
-        vectorCountBySuite.add(createVectorCountByTest(table.columns[test.col], test.test, test.value))
+        tableCountSuite.add(createTableCountTest(table, test.col, test.test, test.value))
+        dfIteratorCountSuite.add(createDataFrameIteratorCountTest(table, test.col, test.test, test.value))
+        dfDirectCountSuite.add(createDataFrameDirectCountTest(table, test.col, test.test, test.value))
+        dfScanCountSuite.add(createDataFrameScanCountTest(table, test.col, test.test, test.value))
+        vectorCountSuite.add(createVectorCountTest(table.columns[test.col], test.test, test.value))
     }
 
-    suites.push(tableIterateSuite, tableCountBySuite, vectorCountBySuite)
+    suites.push(tableIteratorSuite, tableCountSuite, dfIteratorSuite, dfIteratorCountSuite, dfDirectCountSuite, dfScanCountSuite, vectorCountSuite)
 }
 
 console.log('Running apache-arrow performance tests...\n');
@@ -125,7 +133,7 @@ function createGetByIndexTest(vector) {
     };
 }
 
-function createVectorCountByTest(vector, test, value) {
+function createVectorCountTest(vector, test, value) {
     let op;
     if (test == 'gteq') {
         op = function () {
@@ -152,7 +160,7 @@ function createVectorCountByTest(vector, test, value) {
     };
 }
 
-function createTableIterateTest(table) {
+function createTableIteratorTest(table) {
     let row;
     return {
         async: true,
@@ -161,7 +169,7 @@ function createTableIterateTest(table) {
     };
 }
 
-function createTableCountByTest(table, column, test, value) {
+function createTableCountTest(table, column, test, value) {
     let op;
     if (test == 'gteq') {
         op = function () {
@@ -187,3 +195,110 @@ function createTableCountByTest(table, column, test, value) {
         fn: op
     };
 }
+
+function createDataFrameIteratorTest(table) {
+    let df = DataFrame.from(table);
+    let idx;
+    return {
+        async: true,
+        name: `length: ${table.length}`,
+        fn() { for (idx of table) {} }
+    };
+}
+
+function createDataFrameDirectCountTest(table, column, test, value) {
+    let df = DataFrame.from(table);
+
+    if (test == 'gteq') {
+        op = function () {
+            sum = 0;
+            for (let batch = -1; ++batch < df.lengths.length;) {
+                const length = df.lengths[batch];
+
+                // load batches
+                const columns = df.getBatch(batch);
+
+                // yield all indices
+                for (let idx = -1; ++idx < length;) {
+                    sum += (columns[column].get(idx) >= value);
+                }
+            }
+        }
+    } else if (test == 'eq') {
+        op = function() {
+            sum = 0;
+            for (let batch = -1; ++batch < df.lengths.length;) {
+                const length = df.lengths[batch];
+
+                // load batches
+                const columns = df.getBatch(batch);
+
+                // yield all indices
+                for (let idx = -1; ++idx < length;) {
+                    sum += (columns[column].get(idx) == value);
+                }
+            }
+        }
+    } else {
+        throw new Error(`Unrecognized test "${test}"`);
+    }
+
+    return {
+        async: true,
+        name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
+        fn: op
+    };
+}
+
+function createDataFrameScanCountTest(table, column, test, value) {
+    let df = DataFrame.from(table);
+
+    if (test == 'gteq') {
+        op = function () {
+            sum = 0;
+            df.scan((idx, cols)=>{sum += cols[column].get(idx) >= value});
+        }
+    } else if (test == 'eq') {
+        op = function() {
+            sum = 0;
+            df.scan((idx, cols)=>{sum += cols[column].get(idx) == value});
+            console.log(sum);
+        }
+    } else {
+        throw new Error(`Unrecognized test "${test}"`);
+    }
+
+    return {
+        async: true,
+        name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
+        fn: op
+    };
+}
+
+function createDataFrameIteratorCountTest(table, column, test, value) {
+    let df = DataFrame.from(table);
+
+    if (test == 'gteq') {
+        op = function () {
+            sum = 0;
+            for (idx of df) {
+                sum += (df.columns[column].get(idx) >= value);
+            }
+        }
+    } else if (test == 'eq') {
+        op = function() {
+            sum = 0;
+            for (idx of df) {
+                sum += (df.columns[column].get(idx) == value);
+            }
+        }
+    } else {
+        throw new Error(`Unrecognized test "${test}"`);
+    }
+
+    return {
+        async: true,
+        name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
+        fn: op
+    };
+}
diff --git a/js/perf/table_config.js b/js/perf/table_config.js
index 7bface6d2cdde..06c9198353b80 100644
--- a/js/perf/table_config.js
+++ b/js/perf/table_config.js
@@ -25,7 +25,7 @@ const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.a
 tests = [
     {col: 0, test: 'gteq', value: 0        },
     {col: 1, test: 'gteq', value: 0        },
-    {col: 2, test:   'eq', value: 'Seattle'},
+    //{col: 2, test:   'eq', value: 'Seattle'},
 ]
 
 for (const filename of filenames) {
diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index 3a8943434eece..a52deeb4992c0 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -45,12 +45,15 @@ import {
     TimestampVector,
 } from './vector/numeric';
 
+import { DataFrame } from './dataframe/dataframe';
+
 // closure compiler always erases static method names:
 // https://github.com/google/closure-compiler/issues/1776
 // set them via string indexers to save them from the mangler
 Table['from'] = Table.from;
 Table['fromAsync'] = Table.fromAsync;
 BoolVector['pack'] = BoolVector.pack;
+DataFrame['from'] = DataFrame.from;
 
 export { read, readAsync };
 export { Table, Vector, StructRow };
@@ -84,6 +87,8 @@ export {
     FixedSizeListVector,
 };
 
+export { DataFrame } from './dataframe/dataframe';
+
 /* These exports are needed for the closure umd targets */
 try {
     const Arrow = eval('exports');
@@ -93,6 +98,7 @@ try {
         Arrow['readAsync'] = readAsync;
         Arrow['Table'] = Table;
         Arrow['Vector'] = Vector;
+        Arrow['DataFrame'] = DataFrame;
         Arrow['StructRow'] = StructRow;
         Arrow['BoolVector'] = BoolVector;
         Arrow['ListVector'] = ListVector;
diff --git a/js/src/dataframe/dataframe.ts b/js/src/dataframe/dataframe.ts
new file mode 100644
index 0000000000000..ed58f174aa425
--- /dev/null
+++ b/js/src/dataframe/dataframe.ts
@@ -0,0 +1,109 @@
+import { Vector } from "../vector/vector";
+import { StructVector } from "../vector/struct";
+import { VirtualVector } from "../vector/virtual";
+
+export abstract class DataFrame {
+    public abstract columns: Vector<any>[];
+    public abstract getBatch(batch: number): Vector[];
+    public abstract scan(next: (idx: number, cols: Vector[])=>void): void;
+    static from(table: Vector<any>): DataFrame {
+        // There are two types of Vectors we might want to make into
+        // a ChunkedDataFrame:
+        //   1) a StructVector of all VirtualVectors
+        //   2) a VirtualVector of all StructVectors
+        if (table instanceof StructVector) {
+            if (table.columns.every((col) => col instanceof VirtualVector)) {
+                // ChunkedDataFrame case (1)
+                return new ChunkedDataFrame(table.columns as VirtualVector<any>[]);
+            } else {
+                return new SimpleDataFrame(table.columns)
+            }
+        } else if (table instanceof VirtualVector &&
+                   table.vectors.every((vec) => vec instanceof StructVector)) {
+            const structs = table.vectors as StructVector<any>[];
+            const rest: StructVector<any>[] = structs.slice(1);
+            const virtuals: VirtualVector<any>[] = structs[0].columns.map((vec, col_idx) => {
+                return vec.concat(...rest.map((vec) => vec.columns[col_idx]));
+            }) as VirtualVector<any>[];
+            // ChunkedDataFrame case (2)
+            return new ChunkedDataFrame(virtuals);
+        } else {
+            return new SimpleDataFrame([table]);
+        }
+    }
+}
+
+class SimpleDataFrame extends DataFrame {
+    readonly lengths: Uint32Array;
+    constructor(public columns: Vector<any>[]) {
+        super();
+        if (!this.columns.slice(1).every((v) => v.length === this.columns[0].length)) {
+            throw new Error("Attempted to create a DataFrame with un-aligned vectors");
+        }
+        this.lengths = new Uint32Array([0, this.columns[0].length]);
+    }
+
+    public getBatch() {
+        return this.columns;
+    }
+
+    public scan(next: (idx: number, cols: Vector[])=>void) {
+        for (let idx = -1; ++idx < this.lengths[1];) {
+            next(idx, this.columns)
+        }
+    }
+
+    *[Symbol.iterator]() {
+        for (let idx = -1; ++idx < this.lengths[1];) {
+            yield idx;
+        }
+    }
+}
+
+class ChunkedDataFrame extends DataFrame {
+    public columns: Vector<any>[];
+    readonly lengths: Uint32Array;
+    constructor(private virtuals: VirtualVector<any>[]) {
+        super();
+        const offsets = virtuals[0].offsets;
+        if (!this.virtuals.slice(1).every((v) => v.aligned(virtuals[0]))) {
+            throw new Error("Attempted to create a DataFrame with un-aligned vectors");
+        }
+        this.lengths = new Uint32Array(offsets.length);
+        offsets.forEach((offset, i) => {
+            this.lengths[i] = offsets[i+1] - offset;;
+        });
+    }
+
+    getBatch(batch: number): Vector[] {
+        return this.virtuals.map((virt) => virt.vectors[batch]);
+    }
+
+    scan(next: (idx: number, cols: Vector[])=>void) {
+        for (let batch = -1; ++batch < this.lengths.length;) {
+            const length = this.lengths[batch];
+
+            // load batches
+            const columns = this.getBatch(batch);
+
+            // yield all indices
+            for (let idx = -1; ++idx < length;) {
+                next(idx, columns)
+            }
+        }
+    }
+
+    *[Symbol.iterator]() {
+        for (let batch = -1; ++batch < this.lengths.length;) {
+            const length = this.lengths[batch];
+
+            // load batches
+            this.columns = this.getBatch(batch);
+
+            // yield all indices
+            for (let idx = -1; ++idx < length;) {
+                yield idx;
+            }
+        }
+    }
+}
diff --git a/js/src/vector/virtual.ts b/js/src/vector/virtual.ts
index 42db78706db51..9dec75254595f 100644
--- a/js/src/vector/virtual.ts
+++ b/js/src/vector/virtual.ts
@@ -115,6 +115,9 @@ export class VirtualVector<T> implements Vector<T> {
         }
         return new ArrayType(0);
     }
+    aligned(other: VirtualVector<any>): boolean {
+        return this.offsets.every((offset, i) => offset === other.offsets[i]);
+    }
 }
 
 function arraySet<T>(source: T[], target: T[], index: number) {

From 796f45dda65f049dc5f75c0564130dca4d733e71 Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Wed, 10 Jan 2018 11:44:45 -0500
Subject: [PATCH 04/19] add DataFrame filter and count ops

---
 js/perf/index.js              | 23 +++++++-
 js/src/dataframe/dataframe.ts | 98 +++++++++++++++++++++++++++++------
 2 files changed, 105 insertions(+), 16 deletions(-)

diff --git a/js/perf/index.js b/js/perf/index.js
index 95396a986de01..74dbd872d8a6e 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -48,6 +48,7 @@ for (let {name, buffers, tests} of require('./table_config')) {
     const dfIteratorCountSuite = new Benchmark.Suite(`DataFrame Iterator Count "${name}"`, { async: true });
     const dfDirectCountSuite = new Benchmark.Suite(`DataFrame Direct Count "${name}"`, { async: true });
     const dfScanCountSuite = new Benchmark.Suite(`DataFrame Scan Count "${name}"`, { async: true });
+    const dfFilterCountSuite = new Benchmark.Suite(`DataFrame Filter Scan Count "${name}"`, { async: true });
     const vectorCountSuite = new Benchmark.Suite(`Vector Count "${name}"`, { async: true });
     const table = Table.from(buffers);
 
@@ -58,10 +59,11 @@ for (let {name, buffers, tests} of require('./table_config')) {
         dfIteratorCountSuite.add(createDataFrameIteratorCountTest(table, test.col, test.test, test.value))
         dfDirectCountSuite.add(createDataFrameDirectCountTest(table, test.col, test.test, test.value))
         dfScanCountSuite.add(createDataFrameScanCountTest(table, test.col, test.test, test.value))
+        dfFilterCountSuite.add(createDataFrameFilterCountTest(table, test.col, test.test, test.value))
         vectorCountSuite.add(createVectorCountTest(table.columns[test.col], test.test, test.value))
     }
 
-    suites.push(tableIteratorSuite, tableCountSuite, dfIteratorSuite, dfIteratorCountSuite, dfDirectCountSuite, dfScanCountSuite, vectorCountSuite)
+    suites.push(tableIteratorSuite, tableCountSuite, dfIteratorSuite, dfIteratorCountSuite, dfDirectCountSuite, dfScanCountSuite, dfFilterCountSuite, vectorCountSuite)
 }
 
 console.log('Running apache-arrow performance tests...\n');
@@ -275,6 +277,25 @@ function createDataFrameScanCountTest(table, column, test, value) {
     };
 }
 
+function createDataFrameFilterCountTest(table, column, test, value) {
+    let df = DataFrame.from(table);
+    if (test == 'gteq') {
+        df = df.filter((idx, cols)=>cols[column].get(idx) >= value);
+    } else if (test == 'eq') {
+        df = df.filter((idx, cols)=>cols[column].get(idx) == value);
+    } else {
+        throw new Error(`Unrecognized test "${test}"`);
+    }
+
+    return {
+        async: true,
+        name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
+        fn() {
+            df.count();
+        }
+    };
+}
+
 function createDataFrameIteratorCountTest(table, column, test, value) {
     let df = DataFrame.from(table);
 
diff --git a/js/src/dataframe/dataframe.ts b/js/src/dataframe/dataframe.ts
index ed58f174aa425..dc3dd78156b9e 100644
--- a/js/src/dataframe/dataframe.ts
+++ b/js/src/dataframe/dataframe.ts
@@ -2,10 +2,18 @@ import { Vector } from "../vector/vector";
 import { StructVector } from "../vector/struct";
 import { VirtualVector } from "../vector/virtual";
 
+export type NextFunc = (idx: number, cols: Vector[]) => void;
+export type PredicateFunc = (idx: number, cols: Vector[]) => boolean;
+
 export abstract class DataFrame {
+    constructor(readonly lengths: Uint32Array) {}
     public abstract columns: Vector<any>[];
     public abstract getBatch(batch: number): Vector[];
-    public abstract scan(next: (idx: number, cols: Vector[])=>void): void;
+    public abstract scan(next: NextFunc): void;
+    public filter(predicate: PredicateFunc): DataFrame {
+        return new FilteredDataFrame(this, predicate);
+    }
+
     static from(table: Vector<any>): DataFrame {
         // There are two types of Vectors we might want to make into
         // a ChunkedDataFrame:
@@ -31,23 +39,26 @@ export abstract class DataFrame {
             return new SimpleDataFrame([table]);
         }
     }
+
+    count(): number {
+        return this.lengths.reduce((acc, val) => acc + val);
+    }
 }
 
 class SimpleDataFrame extends DataFrame {
     readonly lengths: Uint32Array;
     constructor(public columns: Vector<any>[]) {
-        super();
+        super(new Uint32Array([0, columns[0].length]));
         if (!this.columns.slice(1).every((v) => v.length === this.columns[0].length)) {
             throw new Error("Attempted to create a DataFrame with un-aligned vectors");
         }
-        this.lengths = new Uint32Array([0, this.columns[0].length]);
     }
 
     public getBatch() {
         return this.columns;
     }
 
-    public scan(next: (idx: number, cols: Vector[])=>void) {
+    public scan(next: NextFunc) {
         for (let idx = -1; ++idx < this.lengths[1];) {
             next(idx, this.columns)
         }
@@ -62,24 +73,16 @@ class SimpleDataFrame extends DataFrame {
 
 class ChunkedDataFrame extends DataFrame {
     public columns: Vector<any>[];
-    readonly lengths: Uint32Array;
     constructor(private virtuals: VirtualVector<any>[]) {
-        super();
-        const offsets = virtuals[0].offsets;
-        if (!this.virtuals.slice(1).every((v) => v.aligned(virtuals[0]))) {
-            throw new Error("Attempted to create a DataFrame with un-aligned vectors");
-        }
-        this.lengths = new Uint32Array(offsets.length);
-        offsets.forEach((offset, i) => {
-            this.lengths[i] = offsets[i+1] - offset;;
-        });
+        super(ChunkedDataFrame.getLengths(virtuals));
+        this.virtuals = virtuals;
     }
 
     getBatch(batch: number): Vector[] {
         return this.virtuals.map((virt) => virt.vectors[batch]);
     }
 
-    scan(next: (idx: number, cols: Vector[])=>void) {
+    scan(next: NextFunc) {
         for (let batch = -1; ++batch < this.lengths.length;) {
             const length = this.lengths[batch];
 
@@ -106,4 +109,69 @@ class ChunkedDataFrame extends DataFrame {
             }
         }
     }
+
+    private static getLengths(virtuals: VirtualVector<any>[]): Uint32Array {
+        if (!virtuals.slice(1).every((v) => v.aligned(virtuals[0]))) {
+            throw new Error("Attempted to create a DataFrame with un-aligned vectors");
+        }
+        return new Uint32Array(virtuals[0].vectors.map((v)=>v.length));
+    }
+}
+
+class FilteredDataFrame extends DataFrame {
+    public columns: Vector<any>[];
+    constructor (readonly parent: DataFrame, private predicate: PredicateFunc) {
+        super(parent.lengths);
+    }
+
+    getBatch(batch: number): Vector[] {
+        return this.parent.getBatch(batch);
+    };
+
+    scan(next: NextFunc) {
+        // inlined version of this:
+        // this.parent.scan((idx, columns) => {
+        //     if (this.predicate(idx, columns)) next(idx, columns);
+        // });
+        for (let batch = -1; ++batch < this.parent.lengths.length;) {
+            const length = this.parent.lengths[batch];
+
+            // load batches
+            const columns = this.parent.getBatch(batch);
+
+            // yield all indices
+            for (let idx = -1; ++idx < length;) {
+                if (this.predicate(idx, columns)) next(idx, columns);
+            }
+        }
+    }
+
+    count(): number {
+        // inlined version of this:
+        // let sum = 0;
+        // this.parent.scan((idx, columns) => {
+        //     if (this.predicate(idx, columns)) ++sum;
+        // });
+        // return sum;
+        let sum = 0;
+        for (let batch = -1; ++batch < this.parent.lengths.length;) {
+            const length = this.parent.lengths[batch];
+
+            // load batches
+            const columns = this.parent.getBatch(batch);
+
+            // yield all indices
+            for (let idx = -1; ++idx < length;) {
+                if (this.predicate(idx, columns)) ++sum;
+            }
+        }
+        return sum;
+    }
+
+    filter(predicate: PredicateFunc): DataFrame {
+        return new FilteredDataFrame(
+            this.parent,
+            (idx, cols) => this.predicate(idx, cols) && predicate(idx, cols)
+        );
+    }
 }

From 4d9e8c0667a44f6e00bb882ff543eb290b0d7644 Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Wed, 10 Jan 2018 13:36:29 -0500
Subject: [PATCH 05/19] Add concept of predicates for filtering dataframes

---
 js/perf/index.js              |  12 +--
 js/src/Arrow.ts               |   5 +
 js/src/dataframe/dataframe.ts |  17 ++--
 js/src/dataframe/predicate.ts | 171 ++++++++++++++++++++++++++++++++++
 4 files changed, 192 insertions(+), 13 deletions(-)
 create mode 100644 js/src/dataframe/predicate.ts

diff --git a/js/perf/index.js b/js/perf/index.js
index 74dbd872d8a6e..b5789e8b34c07 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -16,10 +16,10 @@
 // under the License.
 
 // Use the ES5 UMD target as perf baseline
-// const { DataFrame, Table, readVectors } = require('../targets/es5/umd');
-// const { DataFrame, Table, readVectors } = require('../targets/es5/cjs');
-// const { DataFrame, Table, readVectors } = require('../targets/es2015/umd');
-const { DataFrame, Table, readVectors } = require('../targets/es2015/cjs');
+// const { lit, col, DataFrame, Table, readVectors } = require('../targets/es5/umd');
+// const { lit, col, DataFrame, Table, readVectors } = require('../targets/es5/cjs');
+// const { lit, col, DataFrame, Table, readVectors } = require('../targets/es2015/umd');
+const { lit, col, DataFrame, Table, readVectors } = require('../targets/es2015/cjs');
 
 const config = require('./config');
 const Benchmark = require('benchmark');
@@ -280,9 +280,9 @@ function createDataFrameScanCountTest(table, column, test, value) {
 function createDataFrameFilterCountTest(table, column, test, value) {
     let df = DataFrame.from(table);
     if (test == 'gteq') {
-        df = df.filter((idx, cols)=>cols[column].get(idx) >= value);
+        df = df.filter(col(table.columns[column].name).gteq(value));
     } else if (test == 'eq') {
-        df = df.filter((idx, cols)=>cols[column].get(idx) == value);
+        df = df.filter(col(table.columns[column].name).eq(value));
     } else {
         throw new Error(`Unrecognized test "${test}"`);
     }
diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index a52deeb4992c0..ce7235b8b13d4 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -46,6 +46,7 @@ import {
 } from './vector/numeric';
 
 import { DataFrame } from './dataframe/dataframe';
+import { lit, col } from './dataframe/predicate';
 
 // closure compiler always erases static method names:
 // https://github.com/google/closure-compiler/issues/1776
@@ -88,12 +89,16 @@ export {
 };
 
 export { DataFrame } from './dataframe/dataframe';
+export { lit, col } from './dataframe/predicate';
+
 
 /* These exports are needed for the closure umd targets */
 try {
     const Arrow = eval('exports');
     if (typeof Arrow === 'object') {
         // string indexers tell closure compiler not to rename these properties
+        Arrow['lit'] = lit;
+        Arrow['col'] = col;
         Arrow['read'] = read;
         Arrow['readAsync'] = readAsync;
         Arrow['Table'] = Table;
diff --git a/js/src/dataframe/dataframe.ts b/js/src/dataframe/dataframe.ts
index dc3dd78156b9e..0dbb19bbbdd4e 100644
--- a/js/src/dataframe/dataframe.ts
+++ b/js/src/dataframe/dataframe.ts
@@ -2,15 +2,16 @@ import { Vector } from "../vector/vector";
 import { StructVector } from "../vector/struct";
 import { VirtualVector } from "../vector/virtual";
 
+import { Predicate } from "./predicate"
+
 export type NextFunc = (idx: number, cols: Vector[]) => void;
-export type PredicateFunc = (idx: number, cols: Vector[]) => boolean;
 
 export abstract class DataFrame {
     constructor(readonly lengths: Uint32Array) {}
     public abstract columns: Vector<any>[];
     public abstract getBatch(batch: number): Vector[];
     public abstract scan(next: NextFunc): void;
-    public filter(predicate: PredicateFunc): DataFrame {
+    public filter(predicate: Predicate): DataFrame {
         return new FilteredDataFrame(this, predicate);
     }
 
@@ -120,7 +121,7 @@ class ChunkedDataFrame extends DataFrame {
 
 class FilteredDataFrame extends DataFrame {
     public columns: Vector<any>[];
-    constructor (readonly parent: DataFrame, private predicate: PredicateFunc) {
+    constructor (readonly parent: DataFrame, private predicate: Predicate) {
         super(parent.lengths);
     }
 
@@ -138,10 +139,11 @@ class FilteredDataFrame extends DataFrame {
 
             // load batches
             const columns = this.parent.getBatch(batch);
+            const predicate = this.predicate.bind(columns);
 
             // yield all indices
             for (let idx = -1; ++idx < length;) {
-                if (this.predicate(idx, columns)) next(idx, columns);
+                if (predicate(idx, columns)) next(idx, columns);
             }
         }
     }
@@ -159,19 +161,20 @@ class FilteredDataFrame extends DataFrame {
 
             // load batches
             const columns = this.parent.getBatch(batch);
+            const predicate = this.predicate.bind(columns);
 
             // yield all indices
             for (let idx = -1; ++idx < length;) {
-                if (this.predicate(idx, columns)) ++sum;
+                if (predicate(idx, columns)) ++sum;
             }
         }
         return sum;
     }
 
-    filter(predicate: PredicateFunc): DataFrame {
+    filter(predicate: Predicate): DataFrame {
         return new FilteredDataFrame(
             this.parent,
-            (idx, cols) => this.predicate(idx, cols) && predicate(idx, cols)
+            this.predicate.and(predicate)
         );
     }
 }
diff --git a/js/src/dataframe/predicate.ts b/js/src/dataframe/predicate.ts
new file mode 100644
index 0000000000000..4438c0adbaa98
--- /dev/null
+++ b/js/src/dataframe/predicate.ts
@@ -0,0 +1,171 @@
+import { Vector } from "../vector/vector";
+
+export type ValueFunc<T> = (idx: number, cols: Vector[]) => T|null;
+export type PredicateFunc = (idx: number, cols: Vector[]) => boolean;
+
+export abstract class Value<T> {
+    eq(other: Value<T>|T): Predicate {
+        if (!(other instanceof Value)) other = new Literal(other);
+        return new Equals(this, other);
+    }
+    lteq(other: Value<T>|T): Predicate {
+        if (!(other instanceof Value)) other = new Literal(other);
+        return new LTeq(this, other);
+    }
+    gteq(other: Value<T>|T): Predicate {
+        if (!(other instanceof Value)) other = new Literal(other);
+        return new GTeq(this, other);
+    }
+}
+
+class Literal<T=any> extends Value<T> {
+    constructor(public v: T) { super(); }
+}
+
+class Col<T=any> extends Value<T> {
+    vector: Vector<T>;
+    colidx: number;
+
+    constructor(public name: string) { super(); }
+    bind(cols: Vector[]) {
+        if (!this.colidx) {
+            // Assume column index doesn't change between calls to bind
+            //this.colidx = cols.findIndex(v => v.name.indexOf(this.name) != -1);
+            this.colidx = -1;
+            for (let idx = -1; ++idx < cols.length;) {
+                if (cols[idx].name === this.name) {
+                    this.colidx = idx;
+                    break;
+                }
+            }
+            if (this.colidx < 0) throw new Error(`Failed to bind Col "${this.name}"`)
+        }
+        this.vector = cols[this.colidx]
+        return this.vector.get.bind(this.vector);
+    }
+
+    emitString() { return `cols[${this.colidx}].get(idx)`; }
+}
+
+export abstract class Predicate {
+    abstract bind(cols: Vector[]): PredicateFunc;
+    and(expr: Predicate): Predicate { return new And(this, expr); }
+    or(expr: Predicate): Predicate { return new Or(this, expr); }
+    ands(): Predicate[] { return [this]; }
+}
+
+abstract class ComparisonPredicate<T=any> extends Predicate {
+    constructor(public readonly left: Value<T>, public readonly right: Value<T>) {
+        super();
+    }
+
+    bind(cols: Vector<any>[]) {
+        if (this.left instanceof Literal) {
+            if (this.right instanceof Literal) {
+                return this._bindLitLit(cols, this.left, this.right);
+            } else { // right is a Col
+
+                return this._bindColLit(cols, this.right as Col, this.left);
+            }
+        } else { // left is a Col
+            if (this.right instanceof Literal) {
+                return this._bindColLit(cols, this.left as Col, this.right);
+            } else { // right is a Col
+                return this._bindColCol(cols, this.left as Col, this.right as Col);
+            }
+        }
+    }
+
+    protected abstract _bindLitLit(cols: Vector<any>[], left: Literal, right: Literal): PredicateFunc;
+    protected abstract _bindColCol(cols: Vector<any>[], left: Col    , right: Col    ): PredicateFunc;
+    protected abstract _bindColLit(cols: Vector<any>[], col: Col     , lit: Literal  ): PredicateFunc;
+}
+
+abstract class CombinationPredicate extends Predicate {
+    constructor(public readonly left: Predicate, public readonly right: Predicate) {
+        super();
+    }
+}
+
+class And extends CombinationPredicate {
+    bind(cols: Vector[]) {
+        const left = this.left.bind(cols);
+        const right = this.right.bind(cols);
+        return (idx: number, cols: Vector[]) => left(idx, cols) && right(idx, cols);
+    }
+    ands() : Predicate[] { return this.left.ands().concat(this.right.ands()); }
+}
+
+class Or extends CombinationPredicate {
+    bind(cols: Vector[]) {
+        const left = this.left.bind(cols);
+        const right = this.right.bind(cols);
+        return (idx: number, cols: Vector[]) => left(idx, cols) || right(idx, cols);
+    }
+}
+
+class Equals extends ComparisonPredicate {
+    protected _bindLitLit(_: Vector<any>[], left: Literal, right: Literal): PredicateFunc {
+        const rtrn: boolean = left.v == right.v;
+        return () => rtrn;
+    }
+
+    protected _bindColCol(cols: Vector<any>[], left: Col    , right: Col    ): PredicateFunc {
+        const left_func = left.bind(cols);
+        const right_func = right.bind(cols);
+        return (idx: number, cols: Vector[]) => left_func(idx, cols) == right_func(idx, cols);
+    }
+
+    protected _bindColLit(cols: Vector<any>[], col: Col     , lit: Literal  ): PredicateFunc {
+        const col_func = col.bind(cols);
+        return (idx: number, cols: Vector[]) => col_func(idx, cols) == lit.v;
+    }
+}
+
+class LTeq extends ComparisonPredicate {
+    protected _bindLitLit(_: Vector<any>[], left: Literal, right: Literal): PredicateFunc {
+        const rtrn: boolean = left.v <= right.v;
+        return () => rtrn;
+    }
+
+    protected _bindColCol(cols: Vector<any>[], left: Col    , right: Col    ): PredicateFunc {
+        const left_func = left.bind(cols);
+        const right_func = right.bind(cols);
+        return (idx: number, cols: Vector[]) => left_func(idx, cols) <= right_func(idx, cols);
+    }
+
+    protected _bindColLit(cols: Vector<any>[], col: Col     , lit: Literal  ): PredicateFunc {
+        const col_func = col.bind(cols);
+        return (idx: number, cols: Vector[]) => col_func(idx, cols) <= lit.v;
+    }
+}
+
+class GTeq extends ComparisonPredicate {
+    protected _bindLitLit(_: Vector<any>[], left: Literal, right: Literal): PredicateFunc {
+        const rtrn: boolean = left.v >= right.v;
+        return () => rtrn;
+    }
+
+    protected _bindColCol(cols: Vector<any>[], left: Col, right: Col): PredicateFunc {
+        const left_func = left.bind(cols);
+        const right_func = right.bind(cols);
+        return (idx: number, cols: Vector[]) => left_func(idx, cols) >= right_func(idx, cols);
+    }
+
+    protected _bindColLit(cols: Vector<any>[], col: Col, lit: Literal): PredicateFunc {
+        const col_func = col.bind(cols);
+        return (idx: number, cols: Vector[]) => col_func(idx, cols) >= lit.v;
+    }
+    //eval(idx: number, cols: Vector[]) {
+    //    return this.left.eval(idx, cols) >= this.right.eval(idx, cols);
+    //}
+    //emitString() {
+    //    return `${this.left.emitString()} >= ${this.right.emitString()}`
+    //}
+    //createDictionaryEval(schema, lit: Literal, col: Col): (idx: number, cols: Vector[]) => boolean {
+    //    return this.eval;
+    //}
+}
+
+export function lit(n: number): Value<any> { return new Literal(n); }
+export function col(n: string): Value<any> { return new Col(n); }

From aa999f87f16456145c29ac7c3ea21e13a9bdf11b Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Wed, 10 Jan 2018 14:38:49 -0500
Subject: [PATCH 06/19] Add DictionaryVector optimization for equals predicate

---
 js/perf/table_config.js       |  2 +-
 js/src/dataframe/predicate.ts | 26 +++++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/js/perf/table_config.js b/js/perf/table_config.js
index 06c9198353b80..7bface6d2cdde 100644
--- a/js/perf/table_config.js
+++ b/js/perf/table_config.js
@@ -25,7 +25,7 @@ const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.a
 tests = [
     {col: 0, test: 'gteq', value: 0        },
     {col: 1, test: 'gteq', value: 0        },
-    //{col: 2, test:   'eq', value: 'Seattle'},
+    {col: 2, test:   'eq', value: 'Seattle'},
 ]
 
 for (const filename of filenames) {
diff --git a/js/src/dataframe/predicate.ts b/js/src/dataframe/predicate.ts
index 4438c0adbaa98..263b8646d71fc 100644
--- a/js/src/dataframe/predicate.ts
+++ b/js/src/dataframe/predicate.ts
@@ -1,4 +1,5 @@
 import { Vector } from "../vector/vector";
+import { DictionaryVector } from "../vector/dictionary";
 
 export type ValueFunc<T> = (idx: number, cols: Vector[]) => T|null;
 export type PredicateFunc = (idx: number, cols: Vector[]) => boolean;
@@ -118,7 +119,30 @@ class Equals extends ComparisonPredicate {
 
     protected _bindColLit(cols: Vector<any>[], col: Col     , lit: Literal  ): PredicateFunc {
         const col_func = col.bind(cols);
-        return (idx: number, cols: Vector[]) => col_func(idx, cols) == lit.v;
+        if (col.vector instanceof DictionaryVector) {
+            // Assume that there is only one key with the value `lit.v`
+            let key = -1
+            for (; ++key < col.vector.data.length;) {
+                if (col.vector.data.get(key) === lit.v) {
+                    break;
+                }
+            }
+
+            if (key == col.vector.data.length) {
+                // the value doesn't exist in the dictionary - always return
+                // false
+                // TODO: special-case of PredicateFunc that encapsulates this
+                // "always false" behavior. That way filtering operations don't
+                // have to bother checking
+                return () => false;
+            } else {
+                return (idx: number) => {
+                    return (col.vector as DictionaryVector<any>).getKey(idx) === key;
+                }
+            }
+        } else {
+            return (idx: number, cols: Vector[]) => col_func(idx, cols) == lit.v;
+        }
     }
 }
 

From 2744c63c7e699a2acead253d8b903bbe70409738 Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Wed, 10 Jan 2018 16:20:44 -0500
Subject: [PATCH 07/19] Remove Chunked/Simple DataFrame distinction

---
 js/src/dataframe/dataframe.ts | 149 ++++++++++++++--------------------
 1 file changed, 61 insertions(+), 88 deletions(-)

diff --git a/js/src/dataframe/dataframe.ts b/js/src/dataframe/dataframe.ts
index 0dbb19bbbdd4e..88923effd6f77 100644
--- a/js/src/dataframe/dataframe.ts
+++ b/js/src/dataframe/dataframe.ts
@@ -6,81 +6,25 @@ import { Predicate } from "./predicate"
 
 export type NextFunc = (idx: number, cols: Vector[]) => void;
 
-export abstract class DataFrame {
-    constructor(readonly lengths: Uint32Array) {}
-    public abstract columns: Vector<any>[];
-    public abstract getBatch(batch: number): Vector[];
-    public abstract scan(next: NextFunc): void;
-    public filter(predicate: Predicate): DataFrame {
-        return new FilteredDataFrame(this, predicate);
-    }
-
-    static from(table: Vector<any>): DataFrame {
-        // There are two types of Vectors we might want to make into
-        // a ChunkedDataFrame:
-        //   1) a StructVector of all VirtualVectors
-        //   2) a VirtualVector of all StructVectors
-        if (table instanceof StructVector) {
-            if (table.columns.every((col) => col instanceof VirtualVector)) {
-                // ChunkedDataFrame case (1)
-                return new ChunkedDataFrame(table.columns as VirtualVector<any>[]);
-            } else {
-                return new SimpleDataFrame(table.columns)
-            }
-        } else if (table instanceof VirtualVector &&
-                   table.vectors.every((vec) => vec instanceof StructVector)) {
-            const structs = table.vectors as StructVector<any>[];
-            const rest: StructVector<any>[] = structs.slice(1);
-            const virtuals: VirtualVector<any>[] = structs[0].columns.map((vec, col_idx) => {
-                return vec.concat(...rest.map((vec) => vec.columns[col_idx]));
-            }) as VirtualVector<any>[];
-            // ChunkedDataFrame case (2)
-            return new ChunkedDataFrame(virtuals);
-        } else {
-            return new SimpleDataFrame([table]);
-        }
-    }
-
-    count(): number {
-        return this.lengths.reduce((acc, val) => acc + val);
-    }
-}
-
-class SimpleDataFrame extends DataFrame {
+export class DataFrame {
     readonly lengths: Uint32Array;
-    constructor(public columns: Vector<any>[]) {
-        super(new Uint32Array([0, columns[0].length]));
-        if (!this.columns.slice(1).every((v) => v.length === this.columns[0].length)) {
-            throw new Error("Attempted to create a DataFrame with un-aligned vectors");
-        }
-    }
-
-    public getBatch() {
-        return this.columns;
-    }
-
-    public scan(next: NextFunc) {
-        for (let idx = -1; ++idx < this.lengths[1];) {
-            next(idx, this.columns)
-        }
-    }
-
-    *[Symbol.iterator]() {
-        for (let idx = -1; ++idx < this.lengths[1];) {
-            yield idx;
-        }
-    }
-}
-
-class ChunkedDataFrame extends DataFrame {
     public columns: Vector<any>[];
-    constructor(private virtuals: VirtualVector<any>[]) {
-        super(ChunkedDataFrame.getLengths(virtuals));
-        this.virtuals = virtuals;
+    constructor(readonly batches: Vector<any>[][]) {
+        // for each batch
+        this.lengths = new Uint32Array(batches.map((batch)=>{
+            // verify that every vector has the same length, and return that
+            // length
+            // throw an error if the lengths don't match
+            return batch.reduce((length, col) => {
+                if (col.length !== length)
+                    throw new Error("Attempted to create a DataFrame with un-aligned vectors");
+                return length;
+            }, batch[0].length);
+        }));
     }
 
-    getBatch(batch: number): Vector[] {
-        return this.virtuals.map((virt) => virt.vectors[batch]);
+    public filter(predicate: Predicate): DataFrame {
+        return new FilteredDataFrame(this, predicate);
     }
 
     scan(next: NextFunc) {
@@ -88,7 +32,7 @@ class ChunkedDataFrame extends DataFrame {
             const length = this.lengths[batch];
 
             // load batches
-            const columns = this.getBatch(batch);
+            const columns = this.batches[batch];
 
             // yield all indices
             for (let idx = -1; ++idx < length;) {
@@ -97,12 +41,16 @@ class ChunkedDataFrame extends DataFrame {
         }
     }
 
+    count(): number {
+        return this.lengths.reduce((acc, val) => acc + val);
+    }
+
     *[Symbol.iterator]() {
         for (let batch = -1; ++batch < this.lengths.length;) {
             const length = this.lengths[batch];
 
             // load batches
-            this.columns = this.getBatch(batch);
+            this.columns = this.batches[batch];
 
             // yield all indices
             for (let idx = -1; ++idx < length;) {
@@ -111,34 +59,48 @@ class ChunkedDataFrame extends DataFrame {
         }
     }
 
-    private static getLengths(virtuals: VirtualVector<any>[]): Uint32Array {
-        if (!virtuals.slice(1).every((v) => v.aligned(virtuals[0]))) {
-            throw new Error("Attempted to create a DataFrame with un-aligned vectors");
+    static from(table: Vector<any>): DataFrame {
+        if (table instanceof StructVector) {
+            const columns = table.columns;
+            if (isAligned(columns)) {
+                // StructVector of aligned VirtualVectors
+                // break up VirtualVectors into batches
+                const batches = columns[0].vectors.map((_,i) => {
+                    return columns.map((vec: VirtualVector<any>) => {
+                            return vec.vectors[i];
+                        });
+                });
+                return new DataFrame(batches);
+            } else {
+                return new DataFrame([columns]);
+            }
+        } else if (table instanceof VirtualVector &&
+                   table.vectors.every((vec) => vec instanceof StructVector)) {
+            return new DataFrame(table.vectors.map((vec) => {
+                return (vec as StructVector<any>).columns;
+            }));
+        } else {
+            return new DataFrame([[table]]);
         }
-        return new Uint32Array(virtuals[0].vectors.map((v)=>v.length));
     }
 }
 
 class FilteredDataFrame extends DataFrame {
     public columns: Vector<any>[];
     constructor (readonly parent: DataFrame, private predicate: Predicate) {
-        super(parent.lengths);
+        super(parent.batches);
     }
 
-    getBatch(batch: number): Vector[] {
-        return this.parent.getBatch(batch);
-    };
-
     scan(next: NextFunc) {
         // inlined version of this:
         // this.parent.scan((idx, columns) => {
         //     if (this.predicate(idx, columns)) next(idx, columns);
         // });
-        for (let batch = -1; ++batch < this.parent.lengths.length;) {
-            const length = this.parent.lengths[batch];
+        for (let batch = -1; ++batch < this.lengths.length;) {
+            const length = this.lengths[batch];
 
             // load batches
-            const columns = this.parent.getBatch(batch);
+            const columns = this.batches[batch];
             const predicate = this.predicate.bind(columns);
 
             // yield all indices
@@ -156,11 +118,11 @@ class FilteredDataFrame extends DataFrame {
         // });
         // return sum;
         let sum = 0;
-        for (let batch = -1; ++batch < this.parent.lengths.length;) {
-            const length = this.parent.lengths[batch];
+        for (let batch = -1; ++batch < this.lengths.length;) {
+            const length = this.lengths[batch];
 
             // load batches
-            const columns = this.parent.getBatch(batch);
+            const columns = this.batches[batch];
             const predicate = this.predicate.bind(columns);
 
             // yield all indices
@@ -178,3 +140,14 @@ class FilteredDataFrame extends DataFrame {
         );
     }
 }
+
+function isAligned(columns: Vector[]): columns is VirtualVector<any>[] {
+    if (columns.every((col) => col instanceof VirtualVector)) {
+        const virtuals = columns as VirtualVector<any>[]
+
+        return virtuals.slice(1).every((col) => {
+            return col.aligned(virtuals[0]);
+        });
+    }
+    return false;
+}

From 6a41d6872c3fe47b0413c1266bdcf9339aa8bf73 Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Thu, 11 Jan 2018 15:11:09 -0500
Subject: [PATCH 08/19] clean up table benchmarks

---
 js/package.json                       |   1 +
 js/perf/index.js                      | 199 +++++---------------------
 js/perf/table_config.js               |  20 ++-
 js/src/Arrow.externs.ts               |  16 +++
 js/{ => test/data/tables}/generate.py |   0
 5 files changed, 63 insertions(+), 173 deletions(-)
 rename js/{ => test/data/tables}/generate.py (100%)

diff --git a/js/package.json b/js/package.json
index d68e7a6279e61..1f59ac1ef98d2 100644
--- a/js/package.json
+++ b/js/package.json
@@ -12,6 +12,7 @@
     "clean": "gulp clean",
     "debug": "gulp debug",
     "perf": "node ./perf/index.js",
+    "create:perfdata": "python ./test/data/tables/generate.py ./test/data/tables/tracks.arrow",
     "release": "./npm-release.sh",
     "clean:all": "run-p clean clean:testdata",
     "clean:testdata": "gulp clean:testdata",
diff --git a/js/perf/index.js b/js/perf/index.js
index b5789e8b34c07..5ab3e76b24d7a 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -16,54 +16,42 @@
 // under the License.
 
 // Use the ES5 UMD target as perf baseline
-// const { lit, col, DataFrame, Table, readVectors } = require('../targets/es5/umd');
-// const { lit, col, DataFrame, Table, readVectors } = require('../targets/es5/cjs');
-// const { lit, col, DataFrame, Table, readVectors } = require('../targets/es2015/umd');
-const { lit, col, DataFrame, Table, readVectors } = require('../targets/es2015/cjs');
+const { col, DataFrame, Table, readVectors } = require('../targets/es5/umd');
+// const { col, DataFrame, Table, readVectors } = require('../targets/es5/cjs');
+// const { col, DataFrame, Table, readVectors } = require('../targets/es2015/umd');
+// const { col, DataFrame, Table, readVectors } = require('../targets/es2015/cjs');
 
 const config = require('./config');
 const Benchmark = require('benchmark');
 
 const suites = [];
 
-//for (let { name, buffers} of config) {
-//    const parseSuite = new Benchmark.Suite(`Parse "${name}"`, { async: true });
-//    const sliceSuite = new Benchmark.Suite(`Slice "${name}" vectors`, { async: true });
-//    const iterateSuite = new Benchmark.Suite(`Iterate "${name}" vectors`, { async: true });
-//    const getByIndexSuite = new Benchmark.Suite(`Get "${name}" values by index`, { async: true });
-//    parseSuite.add(createFromTableTest(name, buffers));
-//    parseSuite.add(createReadVectorsTest(name, buffers));
-//    for (const vector of Table.from(buffers).columns) {
-//        sliceSuite.add(createSliceTest(vector));
-//        iterateSuite.add(createIterateTest(vector));
-//        getByIndexSuite.add(createGetByIndexTest(vector));
-//    }
-//    suites.push(getByIndexSuite, iterateSuite, sliceSuite, parseSuite);
-//}
+for (let { name, buffers} of config) {
+    const parseSuite = new Benchmark.Suite(`Parse "${name}"`, { async: true });
+    const sliceSuite = new Benchmark.Suite(`Slice "${name}" vectors`, { async: true });
+    const iterateSuite = new Benchmark.Suite(`Iterate "${name}" vectors`, { async: true });
+    const getByIndexSuite = new Benchmark.Suite(`Get "${name}" values by index`, { async: true });
+    parseSuite.add(createFromTableTest(name, buffers));
+    parseSuite.add(createReadVectorsTest(name, buffers));
+    for (const vector of Table.from(buffers).columns) {
+        sliceSuite.add(createSliceTest(vector));
+        iterateSuite.add(createIterateTest(vector));
+        getByIndexSuite.add(createGetByIndexTest(vector));
+    }
+    suites.push(getByIndexSuite, iterateSuite, sliceSuite, parseSuite);
+}
 
 for (let {name, buffers, tests} of require('./table_config')) {
-    const tableIteratorSuite = new Benchmark.Suite(`Table Iterator "${name}"`, { async: true });
-    const tableCountSuite = new Benchmark.Suite(`Table Count "${name}"`, { async: true });
-    const dfIteratorSuite = new Benchmark.Suite(`DataFrame Iterator "${name}"`, { async: true });
-    const dfIteratorCountSuite = new Benchmark.Suite(`DataFrame Iterator Count "${name}"`, { async: true });
+    const dfFilterCountSuite = new Benchmark.Suite(`DataFrame Filter-Scan Count "${name}"`, { async: true });
     const dfDirectCountSuite = new Benchmark.Suite(`DataFrame Direct Count "${name}"`, { async: true });
-    const dfScanCountSuite = new Benchmark.Suite(`DataFrame Scan Count "${name}"`, { async: true });
-    const dfFilterCountSuite = new Benchmark.Suite(`DataFrame Filter Scan Count "${name}"`, { async: true });
-    const vectorCountSuite = new Benchmark.Suite(`Vector Count "${name}"`, { async: true });
     const table = Table.from(buffers);
 
-    tableIteratorSuite.add(createTableIteratorTest(table));
-    dfIteratorSuite.add(createDataFrameIteratorTest(table));
     for (test of tests) {
-        tableCountSuite.add(createTableCountTest(table, test.col, test.test, test.value))
-        dfIteratorCountSuite.add(createDataFrameIteratorCountTest(table, test.col, test.test, test.value))
-        dfDirectCountSuite.add(createDataFrameDirectCountTest(table, test.col, test.test, test.value))
-        dfScanCountSuite.add(createDataFrameScanCountTest(table, test.col, test.test, test.value))
         dfFilterCountSuite.add(createDataFrameFilterCountTest(table, test.col, test.test, test.value))
-        vectorCountSuite.add(createVectorCountTest(table.columns[test.col], test.test, test.value))
+        dfDirectCountSuite.add(createDataFrameDirectCountTest(table, test.col, test.test, test.value))
     }
 
-    suites.push(tableIteratorSuite, tableCountSuite, dfIteratorSuite, dfIteratorCountSuite, dfDirectCountSuite, dfScanCountSuite, dfFilterCountSuite, vectorCountSuite)
+    suites.push(dfFilterCountSuite, dfDirectCountSuite)
 }
 
 console.log('Running apache-arrow performance tests...\n');
@@ -135,81 +123,9 @@ function createGetByIndexTest(vector) {
     };
 }
 
-function createVectorCountTest(vector, test, value) {
-    let op;
-    if (test == 'gteq') {
-        op = function () {
-            sum = 0;
-            for (cell of vector) {
-                sum += (cell >= value)
-            }
-        }
-    } else if (test == 'eq') {
-        op = function () {
-            sum = 0;
-            for (cell of vector) {
-                sum += (cell == value)
-            }
-        }
-    } else {
-        throw new Error(`Unrecognized test "$test"`);
-    }
-
-    return {
-        async: true,
-        name: `name: '${vector.name}', length: ${vector.length}, type: ${vector.type}, test: ${test}, value: ${value}`,
-        fn: op
-    };
-}
-
-function createTableIteratorTest(table) {
-    let row;
-    return {
-        async: true,
-        name: `length: ${table.length}`,
-        fn() { for (row of table) {} }
-    };
-}
-
-function createTableCountTest(table, column, test, value) {
-    let op;
-    if (test == 'gteq') {
-        op = function () {
-            sum = 0;
-            for (row of table) {
-                sum += (row.get(column) >= value)
-            }
-        }
-    } else if (test == 'eq') {
-        op = function() {
-            sum = 0;
-            for (row of table) {
-                sum += (row.get(column) == value)
-            }
-        }
-    } else {
-        throw new Error(`Unrecognized test "${test}"`);
-    }
-
-    return {
-        async: true,
-        name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
-        fn: op
-    };
-}
-
-function createDataFrameIteratorTest(table) {
-    let df = DataFrame.from(table);
-    let idx;
-    return {
-        async: true,
-        name: `length: ${table.length}`,
-        fn() { for (idx of table) {} }
-    };
-}
-
 function createDataFrameDirectCountTest(table, column, test, value) {
     let df = DataFrame.from(table);
+    let colidx = table.columns.findIndex((c)=>c.name === column);
 
     if (test == 'gteq') {
         op = function () {
@@ -218,11 +134,11 @@ function createDataFrameDirectCountTest(table, column, test, value) {
                 const length = df.lengths[batch];
 
                 // load batches
-                const columns = df.getBatch(batch);
+                const columns = df.batches[batch];
 
                 // yield all indices
                 for (let idx = -1; ++idx < length;) {
-                    sum += (columns[column].get(idx) >= value);
+                    sum += (columns[colidx].get(idx) >= value);
                 }
             }
         }
@@ -233,11 +149,11 @@ function createDataFrameDirectCountTest(table, column, test, value) {
                 const length = df.lengths[batch];
 
                 // load batches
-                const columns = df.getBatch(batch);
+                const columns = df.batches[batch]
 
                 // yield all indices
                 for (let idx = -1; ++idx < length;) {
-                    sum += (columns[column].get(idx) == value);
+                    sum += (columns[colidx].get(idx) == value);
                 }
             }
         }
@@ -247,79 +163,28 @@ function createDataFrameDirectCountTest(table, column, test, value) {
 
     return {
         async: true,
-        name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
-        fn: op
-    };
-}
-
-function createDataFrameScanCountTest(table, column, test, value) {
-    let df = DataFrame.from(table);
-
-    if (test == 'gteq') {
-        op = function () {
-            sum = 0;
-            df.scan((idx, cols)=>{sum += cols[column].get(idx) >= value});
-        }
-    } else if (test == 'eq') {
-        op = function() {
-            sum = 0;
-            df.scan((idx, cols)=>{sum += cols[column].get(idx) == value});
-            console.log(sum);
-        }
-    } else {
-        throw new Error(`Unrecognized test "${test}"`);
-    }
-
-    return {
-        async: true,
-        name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
+        name: `name: '${column}', length: ${table.length}, type: ${table.columns[colidx].type}, test: ${test}, value: ${value}`,
         fn: op
     };
 }
 
 function createDataFrameFilterCountTest(table, column, test, value) {
     let df = DataFrame.from(table);
+    let colidx = table.columns.findIndex((c)=>c.name === column);
+
     if (test == 'gteq') {
-        df = df.filter(col(table.columns[column].name).gteq(value));
+        df = df.filter(col(column).gteq(value));
     } else if (test == 'eq') {
-        df = df.filter(col(table.columns[column].name).eq(value));
+        df = df.filter(col(column).eq(value));
     } else {
         throw new Error(`Unrecognized test "${test}"`);
     }
 
     return {
         async: true,
-        name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
+        name: `name: '${column}', length: ${table.length}, type: ${table.columns[colidx].type}, test: ${test}, value: ${value}`,
         fn() {
             df.count();
         }
     };
 }
-
-function createDataFrameIteratorCountTest(table, column, test, value) {
-    let df = DataFrame.from(table);
-
-    if (test == 'gteq') {
-        op = function () {
-            sum = 0;
-            for (idx of df) {
-                sum += (df.columns[column].get(idx) >= value);
-            }
-        }
-    } else if (test == 'eq') {
-        op = function() {
-            sum = 0;
-            for (idx of df) {
-                sum += (df.columns[column].get(idx) == value);
-            }
-        }
-    } else {
-        throw new Error(`Unrecognized test "${test}"`);
-    }
-
-    return {
-        async: true,
-        name: `name: '${table.columns[column].name}', length: ${table.length}, type: ${table.columns[column].type}, test: ${test}, value: ${value}`,
-        fn: op
-    };
-}
diff --git a/js/perf/table_config.js b/js/perf/table_config.js
index 7bface6d2cdde..3c045e4571e44 100644
--- a/js/perf/table_config.js
+++ b/js/perf/table_config.js
@@ -22,15 +22,23 @@ const glob = require('glob');
 const config = [];
 const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.arrow`));
 
-tests = [
-    {col: 0, test: 'gteq', value: 0        },
-    {col: 1, test: 'gteq', value: 0        },
-    {col: 2, test:   'eq', value: 'Seattle'},
-]
+tests = {
+    "tracks": [
+        {col: 'lat',    test: 'gteq', value: 0        },
+        {col: 'lng',    test: 'gteq', value: 0        },
+        {col: 'origin', test:   'eq', value: 'Seattle'},
+    ]
+}
 
 for (const filename of filenames) {
     const { name } = path.parse(filename);
-    config.push({ name, buffers: [fs.readFileSync(filename)], tests });
+    if (name in tests) {
+        config.push({
+            name,
+            buffers: [fs.readFileSync(filename)],
+            tests: tests[name]
+        });
+    }
 }
 
 module.exports = config;
diff --git a/js/src/Arrow.externs.ts b/js/src/Arrow.externs.ts
index c23930271183d..d3bfdbbf8e123 100644
--- a/js/src/Arrow.externs.ts
+++ b/js/src/Arrow.externs.ts
@@ -82,3 +82,19 @@ let DictionaryVector = function() {};
 DictionaryVector.prototype.getKey;
 /** @type {?} */
 DictionaryVector.prototype.getValue;
+
+let DataFrame = function () {};
+/** @type {?} */
+DataFrame.prototype.lengths;
+/** @type {?} */
+DataFrame.prototype.columns;
+/** @type {?} */
+DataFrame.prototype.batches;
+
+let Col = function() {};
+/** @type {?} */
+Col.prototype.gteq;
+/** @type {?} */
+Col.prototype.lteq;
+/** @type {?} */
+Col.prototype.eq;
diff --git a/js/generate.py b/js/test/data/tables/generate.py
similarity index 100%
rename from js/generate.py
rename to js/test/data/tables/generate.py

From e8979ba5e44bfc3e66befec8a74368b6d3a08416 Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Thu, 11 Jan 2018 15:12:18 -0500
Subject: [PATCH 09/19] Refactor DataFrame to extend Vector<StructRow>

---
 js/src/dataframe/dataframe.ts | 50 ++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/js/src/dataframe/dataframe.ts b/js/src/dataframe/dataframe.ts
index 88923effd6f77..c8db286f28eb4 100644
--- a/js/src/dataframe/dataframe.ts
+++ b/js/src/dataframe/dataframe.ts
@@ -1,15 +1,29 @@
 import { Vector } from "../vector/vector";
-import { StructVector } from "../vector/struct";
+import { StructVector, StructRow } from "../vector/struct";
 import { VirtualVector } from "../vector/virtual";
 
 import { Predicate } from "./predicate"
 
 export type NextFunc = (idx: number, cols: Vector[]) => void;
 
-export class DataFrame {
+export class DataFrameRow extends StructRow<any> {
+    constructor (batches: Vector[], idx: number) {
+        super(new StructVector({columns: batches}), idx);
+    }
+}
+
+export interface DataFrameOps {
+    readonly batches: Vector[][];
+    readonly lengths: Uint32Array;
+    filter(predicate: Predicate): DataFrameOps;
+    scan(next: NextFunc): void;
+    count(): number;
+}
+
+export class DataFrame extends Vector<DataFrameRow> implements DataFrameOps {
     readonly lengths: Uint32Array;
-    public columns: Vector<any>[];
-    constructor(readonly batches: Vector<any>[][]) {
+    constructor(readonly batches: Vector[][]) {
+        super();
         // for each batch
         this.lengths = new Uint32Array(batches.map((batch)=>{
             // verify that every vector has the same length, and return that
@@ -23,7 +37,17 @@ export class DataFrame {
         }));
     }
 
-    public filter(predicate: Predicate): DataFrame {
+    get(idx: number): DataFrameRow|null {
+        let batch = 0;
+        while (idx > this.lengths[batch] && batch < this.lengths.length)
+            idx -= this.lengths[batch++];
+
+        if (batch === this.lengths.length) return null;
+
+        else return new DataFrameRow(this.batches[batch], idx);
+    }
+
+    filter(predicate: Predicate): DataFrameOps {
         return new FilteredDataFrame(this, predicate);
     }
 
@@ -50,11 +74,11 @@ export class DataFrame {
             const length = this.lengths[batch];
 
             // load batches
-            this.columns = this.batches[batch];
+            const columns = this.batches[batch];
 
             // yield all indices
             for (let idx = -1; ++idx < length;) {
-                yield idx;
+                yield new DataFrameRow(columns, idx);
             }
         }
     }
@@ -85,10 +109,12 @@ export class DataFrame {
     }
 }
 
-class FilteredDataFrame extends DataFrame {
-    public columns: Vector<any>[];
-    constructor (readonly parent: DataFrame, private predicate: Predicate) {
-        super(parent.batches);
+class FilteredDataFrame implements DataFrameOps {
+    readonly lengths: Uint32Array;
+    readonly batches: Vector[][];
+    constructor (readonly parent: DataFrameOps, private predicate: Predicate) {
+        this.batches = parent.batches;
+        this.lengths = parent.lengths;
     }
 
     scan(next: NextFunc) {
@@ -133,7 +159,7 @@ class FilteredDataFrame extends DataFrame {
         return sum;
     }
 
-    filter(predicate: Predicate): DataFrame {
+    filter(predicate: Predicate): DataFrameOps {
         return new FilteredDataFrame(
             this.parent,
             this.predicate.and(predicate)

From 1d60aa1436e81d5d67b3b16a8f6f2c5df7e57189 Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Thu, 11 Jan 2018 16:07:55 -0500
Subject: [PATCH 10/19] Moved DataFrame ops to Table. DataFrame is now an
 interface

---
 js/perf/index.js                          |  19 ++-
 js/src/Arrow.externs.ts                   |  12 +-
 js/src/Arrow.ts                           |   8 +-
 js/src/bin/arrow2csv.ts                   |   2 +-
 js/src/dataframe/dataframe.ts             | 179 ----------------------
 js/src/{dataframe => vector}/predicate.ts |   0
 js/src/vector/table.ts                    | 164 +++++++++++++++++---
 7 files changed, 158 insertions(+), 226 deletions(-)
 delete mode 100644 js/src/dataframe/dataframe.ts
 rename js/src/{dataframe => vector}/predicate.ts (100%)

diff --git a/js/perf/index.js b/js/perf/index.js
index 5ab3e76b24d7a..9527a8e842c5a 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -124,17 +124,16 @@ function createGetByIndexTest(vector) {
 }
 
 function createDataFrameDirectCountTest(table, column, test, value) {
-    let df = DataFrame.from(table);
     let colidx = table.columns.findIndex((c)=>c.name === column);
 
     if (test == 'gteq') {
         op = function () {
             sum = 0;
-            for (let batch = -1; ++batch < df.lengths.length;) {
-                const length = df.lengths[batch];
+            for (let batch = -1; ++batch < table.lengths.length;) {
+                const length = table.lengths[batch];
 
                 // load batches
-                const columns = df.batches[batch];
+                const columns = table.batches[batch];
 
                 // yield all indices
                 for (let idx = -1; ++idx < length;) {
@@ -145,11 +144,11 @@ function createDataFrameDirectCountTest(table, column, test, value) {
     } else if (test == 'eq') {
         op = function() {
             sum = 0;
-            for (let batch = -1; ++batch < df.lengths.length;) {
-                const length = df.lengths[batch];
+            for (let batch = -1; ++batch < table.lengths.length;) {
+                const length = table.lengths[batch];
 
                 // load batches
-                const columns = df.batches[batch]
+                const columns = table.batches[batch]
 
                 // yield all indices
                 for (let idx = -1; ++idx < length;) {
@@ -169,13 +168,13 @@ function createDataFrameDirectCountTest(table, column, test, value) {
 }
 
 function createDataFrameFilterCountTest(table, column, test, value) {
-    let df = DataFrame.from(table);
     let colidx = table.columns.findIndex((c)=>c.name === column);
+    let df;
 
     if (test == 'gteq') {
-        df = df.filter(col(column).gteq(value));
+        df = table.filter(col(column).gteq(value));
     } else if (test == 'eq') {
-        df = df.filter(col(column).eq(value));
+        df = table.filter(col(column).eq(value));
     } else {
         throw new Error(`Unrecognized test "${test}"`);
     }
diff --git a/js/src/Arrow.externs.ts b/js/src/Arrow.externs.ts
index d3bfdbbf8e123..0685d262cc186 100644
--- a/js/src/Arrow.externs.ts
+++ b/js/src/Arrow.externs.ts
@@ -50,6 +50,10 @@ Table.prototype.key;
 Table.prototype.select;
 /** @type {?} */
 Table.prototype.toString;
+/** @type {?} */
+Table.prototype.lengths;
+/** @type {?} */
+Table.prototype.batches;
 
 let Vector = function() {};
 /** @type {?} */
@@ -83,14 +87,6 @@ DictionaryVector.prototype.getKey;
 /** @type {?} */
 DictionaryVector.prototype.getValue;
 
-let DataFrame = function () {};
-/** @type {?} */
-DataFrame.prototype.lengths;
-/** @type {?} */
-DataFrame.prototype.columns;
-/** @type {?} */
-DataFrame.prototype.batches;
-
 let Col = function() {};
 /** @type {?} */
 Col.prototype.gteq;
diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index ce7235b8b13d4..d80cfed4864f8 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -45,8 +45,7 @@ import {
     TimestampVector,
 } from './vector/numeric';
 
-import { DataFrame } from './dataframe/dataframe';
-import { lit, col } from './dataframe/predicate';
+import { lit, col } from './vector/predicate';
 
 // closure compiler always erases static method names:
 // https://github.com/google/closure-compiler/issues/1776
@@ -54,7 +53,6 @@ import { lit, col } from './dataframe/predicate';
 Table['from'] = Table.from;
 Table['fromAsync'] = Table.fromAsync;
 BoolVector['pack'] = BoolVector.pack;
-DataFrame['from'] = DataFrame.from;
 
 export { read, readAsync };
 export { Table, Vector, StructRow };
@@ -88,8 +86,7 @@ export {
     FixedSizeListVector,
 };
 
-export { DataFrame } from './dataframe/dataframe';
-export { lit, col } from './dataframe/predicate';
+export { lit, col } from './vector/predicate';
 
 
 /* These exports are needed for the closure umd targets */
@@ -103,7 +100,6 @@ try {
         Arrow['readAsync'] = readAsync;
         Arrow['Table'] = Table;
         Arrow['Vector'] = Vector;
-        Arrow['DataFrame'] = DataFrame;
         Arrow['StructRow'] = StructRow;
         Arrow['BoolVector'] = BoolVector;
         Arrow['ListVector'] = ListVector;
diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts
index 01ef0b848ce75..117d417f1b4fa 100644
--- a/js/src/bin/arrow2csv.ts
+++ b/js/src/bin/arrow2csv.ts
@@ -97,7 +97,7 @@ files.forEach((source) => {
     printTable(table);
 });
 
-function printTable(table: Arrow.Table<any>) {
+function printTable(table: Arrow.Table) {
     let header = [...table.columns.map((_, i) => table.key(i))].map(stringify);
     let maxColumnWidths = header.map(x => x.length);
     // Pass one to convert to strings and count max column widths
diff --git a/js/src/dataframe/dataframe.ts b/js/src/dataframe/dataframe.ts
deleted file mode 100644
index c8db286f28eb4..0000000000000
--- a/js/src/dataframe/dataframe.ts
+++ /dev/null
@@ -1,179 +0,0 @@
-import { Vector } from "../vector/vector";
-import { StructVector, StructRow } from "../vector/struct";
-import { VirtualVector } from "../vector/virtual";
-
-import { Predicate } from "./predicate"
-
-export type NextFunc = (idx: number, cols: Vector[]) => void;
-
-export class DataFrameRow extends StructRow<any> {
-    constructor (batches: Vector[], idx: number) {
-        super(new StructVector({columns: batches}), idx);
-    }
-}
-
-export interface DataFrameOps {
-    readonly batches: Vector[][];
-    readonly lengths: Uint32Array;
-    filter(predicate: Predicate): DataFrameOps;
-    scan(next: NextFunc): void;
-    count(): number;
-}
-
-export class DataFrame extends Vector<DataFrameRow> implements DataFrameOps {
-    readonly lengths: Uint32Array;
-    constructor(readonly batches: Vector[][]) {
-        super();
-        // for each batch
-        this.lengths = new Uint32Array(batches.map((batch)=>{
-            // verify that every vector has the same length, and return that
-            // length
-            // throw an error if the lengths don't match
-            return batch.reduce((length, col) => {
-                if (col.length !== length)
-                    throw new Error("Attempted to create a DataFrame with un-aligned vectors");
-                return length;
-            }, batch[0].length);
-        }));
-    }
-
-    get(idx: number): DataFrameRow|null {
-        let batch = 0;
-        while (idx > this.lengths[batch] && batch < this.lengths.length)
-            idx -= this.lengths[batch++];
-
-        if (batch === this.lengths.length) return null;
-
-        else return new DataFrameRow(this.batches[batch], idx);
-    }
-
-    filter(predicate: Predicate): DataFrameOps {
-        return new FilteredDataFrame(this, predicate);
-    }
-
-    scan(next: NextFunc) {
-        for (let batch = -1; ++batch < this.lengths.length;) {
-            const length = this.lengths[batch];
-
-            // load batches
-            const columns = this.batches[batch];
-
-            // yield all indices
-            for (let idx = -1; ++idx < length;) {
-                next(idx, columns)
-            }
-        }
-    }
-
-    count(): number {
-        return this.lengths.reduce((acc, val) => acc + val);
-    }
-
-    *[Symbol.iterator]() {
-        for (let batch = -1; ++batch < this.lengths.length;) {
-            const length = this.lengths[batch];
-
-            // load batches
-            const columns = this.batches[batch];
-
-            // yield all indices
-            for (let idx = -1; ++idx < length;) {
-                yield new DataFrameRow(columns, idx);
-            }
-        }
-    }
-
-    static from(table: Vector<any>): DataFrame {
-        if (table instanceof StructVector) {
-            const columns = table.columns;
-            if (isAligned(columns)) {
-                // StructVector of aligned VirtualVectors
-                // break up VirtualVectors into batches
-                const batches = columns[0].vectors.map((_,i) => {
-                    return columns.map((vec: VirtualVector<any>) => {
-                            return vec.vectors[i];
-                        });
-                });
-                return new DataFrame(batches);
-            } else {
-                return new DataFrame([columns]);
-            }
-        } else if (table instanceof VirtualVector &&
-                   table.vectors.every((vec) => vec instanceof StructVector)) {
-            return new DataFrame(table.vectors.map((vec) => {
-                return (vec as StructVector<any>).columns;
-            }));
-        } else {
-            return new DataFrame([[table]]);
-        }
-    }
-}
-
-class FilteredDataFrame implements DataFrameOps {
-    readonly lengths: Uint32Array;
-    readonly batches: Vector[][];
-    constructor (readonly parent: DataFrameOps, private predicate: Predicate) {
-        this.batches = parent.batches;
-        this.lengths = parent.lengths;
-    }
-
-    scan(next: NextFunc) {
-        // inlined version of this:
-        // this.parent.scan((idx, columns) => {
-        //     if (this.predicate(idx, columns)) next(idx, columns);
-        // });
-        for (let batch = -1; ++batch < this.lengths.length;) {
-            const length = this.lengths[batch];
-
-            // load batches
-            const columns = this.batches[batch];
-            const predicate = this.predicate.bind(columns);
-
-            // yield all indices
-            for (let idx = -1; ++idx < length;) {
-                if (predicate(idx, columns)) next(idx, columns);
-            }
-        }
-    }
-
-    count(): number {
-        // inlined version of this:
-        // let sum = 0;
-        // this.parent.scan((idx, columns) => {
-        //     if (this.predicate(idx, columns)) ++sum;
-        // });
-        // return sum;
-        let sum = 0;
-        for (let batch = -1; ++batch < this.lengths.length;) {
-            const length = this.lengths[batch];
-
-            // load batches
-            const columns = this.batches[batch];
-            const predicate = this.predicate.bind(columns);
-
-            // yield all indices
-            for (let idx = -1; ++idx < length;) {
-                if (predicate(idx, columns)) ++sum;
-            }
-        }
-        return sum;
-    }
-
-    filter(predicate: Predicate): DataFrameOps {
-        return new FilteredDataFrame(
-            this.parent,
-            this.predicate.and(predicate)
-        );
-    }
-}
-
-function isAligned(columns: Vector[]): columns is VirtualVector<any>[] {
-    if (columns.every((col) => col instanceof VirtualVector)) {
-        const virtuals = columns as VirtualVector<any>[]
-
-        return virtuals.slice(1).every((col) => {
-            return col.aligned(virtuals[0]);
-        });
-    }
-    return false;
-}
diff --git a/js/src/dataframe/predicate.ts b/js/src/vector/predicate.ts
similarity index 100%
rename from js/src/dataframe/predicate.ts
rename to js/src/vector/predicate.ts
diff --git a/js/src/vector/table.ts b/js/src/vector/table.ts
index ca2b66a22da80..e81fe16a94ae8 100644
--- a/js/src/vector/table.ts
+++ b/js/src/vector/table.ts
@@ -18,44 +18,164 @@
 import { Vector } from './vector';
 import { StructVector, StructRow } from './struct';
 import { read, readAsync } from '../reader/arrow';
+import { Predicate } from './predicate';
 
-function concatVectors(tableVectors: Vector<any>[], batchVectors: Vector<any>[]) {
-    return tableVectors.length === 0 ? batchVectors : batchVectors.map((vec, i, _vs, col = tableVectors[i]) =>
-        vec && col && col.concat(vec) || col || vec
-    ) as Vector<any>[];
+export type NextFunc = (idx: number, cols: Vector[]) => void;
+
+export class DataFrameRow extends StructRow<any> {
+    constructor (batch: Vector[], idx: number) {
+        super(new StructVector({columns: batch}), idx);
+    }
+    toString() {
+        return this.toArray().map((x) => JSON.stringify(x)).join(', ');
+    }
 }
 
-export class Table<T> extends StructVector<T> {
+export interface DataFrame {
+    readonly batches: Vector[][];
+    readonly lengths: Uint32Array;
+    filter(predicate: Predicate): DataFrame;
+    scan(next: NextFunc): void;
+    count(): number;
+}
+
+function columnsFromBatches(batches: Vector[][]) {
+    const remaining = batches.slice(1);
+    return batches[0].map((vec, colidx) =>
+        vec.concat(...remaining.map((batch) => batch[colidx]))
+    );
+}
+
+export class Table extends StructVector<any> implements DataFrame {
     static from(sources?: Iterable<Uint8Array | Buffer | string> | object | string) {
-        let columns: Vector<any>[] = [];
+        let batches: Vector<any>[][] = [[]];
         if (sources) {
-            for (let vectors of read(sources)) {
-                columns = concatVectors(columns, vectors);
-            }
+            batches = Array.from(read(sources));
         }
-        return new Table({ columns });
+        return new Table({ batches });
     }
     static async fromAsync(sources?: AsyncIterable<Uint8Array | Buffer | string>) {
-        let columns: Vector<any>[] = [];
+        let batches: Vector<any>[][] = [[]];
         if (sources) {
-            for await (let vectors of readAsync(sources)) {
-                columns = columns = concatVectors(columns, vectors);
+            batches = [];
+            for await (let batch of readAsync(sources)) {
+                batches.push(batch);
             }
         }
-        return new Table({ columns });
+        return new Table({ batches });
     }
+
+    // VirtualVector of each column, spanning batches
+    readonly columns: Vector<any>[];
+
+    // List of batches, where each batch is a list of Vectors
+    readonly batches: Vector<any>[][];
+    readonly lengths: Uint32Array;
     readonly length: number;
-    constructor(argv: { columns: Vector<any>[] }) {
-        super(argv);
-        this.length = Math.max(...this.columns.map((col) => col.length)) | 0;
+    constructor(argv: { batches: Vector<any>[][] }) {
+        super({columns: columnsFromBatches(argv.batches)});
+        this.batches = argv.batches;
+        this.lengths = new Uint32Array(this.batches.map((batch) => batch[0].length));
+
+        this.length = this.lengths.reduce((acc, length) => acc + length);
+    }
+    get(idx: number): DataFrameRow {
+        let batch = 0;
+        while (idx > this.lengths[batch] && batch < this.lengths.length)
+            idx -= this.lengths[batch++];
+
+        if (batch === this.lengths.length) throw new Error("Overflow")
+
+        else return new DataFrameRow(this.batches[batch], idx);
+    }
+    filter(predicate: Predicate): DataFrame {
+        return new FilteredDataFrame(this, predicate);
+    }
+    scan(next: NextFunc) {
+        for (let batch = -1; ++batch < this.lengths.length;) {
+            const length = this.lengths[batch];
+
+            // load batches
+            const columns = this.batches[batch];
+
+            // yield all indices
+            for (let idx = -1; ++idx < length;) {
+                next(idx, columns)
+            }
+        }
     }
-    get(index: number): TableRow<T> {
-        return new TableRow(this, index);
+    count(): number {
+        return this.lengths.reduce((acc, val) => acc + val);
+    }
+    *[Symbol.iterator]() {
+        for (let batch = -1; ++batch < this.lengths.length;) {
+            const length = this.lengths[batch];
+
+            // load batches
+            const columns = this.batches[batch];
+
+            // yield all indices
+            for (let idx = -1; ++idx < length;) {
+                yield new DataFrameRow(columns, idx);
+            }
+        }
     }
 }
 
-export class TableRow<T> extends StructRow<T> {
-    toString() {
-        return this.toArray().map((x) => JSON.stringify(x)).join(', ');
+class FilteredDataFrame implements DataFrame {
+    readonly lengths: Uint32Array;
+    readonly batches: Vector[][];
+    constructor (readonly parent: DataFrame, private predicate: Predicate) {
+        this.batches = parent.batches;
+        this.lengths = parent.lengths;
+    }
+
+    scan(next: NextFunc) {
+        // inlined version of this:
+        // this.parent.scan((idx, columns) => {
+        //     if (this.predicate(idx, columns)) next(idx, columns);
+        // });
+        for (let batch = -1; ++batch < this.lengths.length;) {
+            const length = this.lengths[batch];
+
+            // load batches
+            const columns = this.batches[batch];
+            const predicate = this.predicate.bind(columns);
+
+            // yield all indices
+            for (let idx = -1; ++idx < length;) {
+                if (predicate(idx, columns)) next(idx, columns);
+            }
+        }
+    }
+
+    count(): number {
+        // inlined version of this:
+        // let sum = 0;
+        // this.parent.scan((idx, columns) => {
+        //     if (this.predicate(idx, columns)) ++sum;
+        // });
+        // return sum;
+        let sum = 0;
+        for (let batch = -1; ++batch < this.lengths.length;) {
+            const length = this.lengths[batch];
+
+            // load batches
+            const columns = this.batches[batch];
+            const predicate = this.predicate.bind(columns);
+
+            // yield all indices
+            for (let idx = -1; ++idx < length;) {
+                if (predicate(idx, columns)) ++sum;
+            }
+        }
+        return sum;
+    }
+
+    filter(predicate: Predicate): DataFrame {
+        return new FilteredDataFrame(
+            this.parent,
+            this.predicate.and(predicate)
+        );
     }
 }

From a9fff89040b0eba16fb0f0590854aeec0a2eee35 Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Thu, 11 Jan 2018 16:50:43 -0500
Subject: [PATCH 11/19] Move Table out of the Vector hierarchy

---
 js/src/Arrow.ts                  | 10 +++++-----
 js/src/bin/arrow2csv.ts          |  4 ++--
 js/src/{vector => }/predicate.ts |  4 ++--
 js/src/{vector => }/table.ts     | 27 ++++++++++++++++-----------
 4 files changed, 25 insertions(+), 20 deletions(-)
 rename js/src/{vector => }/predicate.ts (98%)
 rename js/src/{vector => }/table.ts (89%)

diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index d80cfed4864f8..ed9ff577bb1e5 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -15,7 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Table } from './vector/table';
+import { Table, TableRow } from './table';
+import { lit, col } from './predicate';
 import { Vector } from './vector/vector';
 import { Utf8Vector } from './vector/utf8';
 import { DictionaryVector } from './vector/dictionary';
@@ -45,8 +46,6 @@ import {
     TimestampVector,
 } from './vector/numeric';
 
-import { lit, col } from './vector/predicate';
-
 // closure compiler always erases static method names:
 // https://github.com/google/closure-compiler/issues/1776
 // set them via string indexers to save them from the mangler
@@ -55,7 +54,9 @@ Table['fromAsync'] = Table.fromAsync;
 BoolVector['pack'] = BoolVector.pack;
 
 export { read, readAsync };
-export { Table, Vector, StructRow };
+export { Table, TableRow };
+export { lit, col };
+export { Vector, StructRow };
 export { Uint64, Int64, Int128 };
 export { NumericVectorConstructor } from './vector/numeric';
 export { List, TypedArray, TypedArrayConstructor } from './vector/types';
@@ -86,7 +87,6 @@ export {
     FixedSizeListVector,
 };
 
-export { lit, col } from './vector/predicate';
 
 
 /* These exports are needed for the closure umd targets */
diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts
index 117d417f1b4fa..2bc1600a8408d 100644
--- a/js/src/bin/arrow2csv.ts
+++ b/js/src/bin/arrow2csv.ts
@@ -98,7 +98,7 @@ files.forEach((source) => {
 });
 
 function printTable(table: Arrow.Table) {
-    let header = [...table.columns.map((_, i) => table.key(i))].map(stringify);
+    let header = [...table.columns.map((c) => c.name)].map(stringify);
     let maxColumnWidths = header.map(x => x.length);
     // Pass one to convert to strings and count max column widths
     for (let i = -1, n = table.length - 1; ++i < n;) {
@@ -132,4 +132,4 @@ function stringify(x: any) {
                                       : `${x}`;
 }
 
-})();
\ No newline at end of file
+})();
diff --git a/js/src/vector/predicate.ts b/js/src/predicate.ts
similarity index 98%
rename from js/src/vector/predicate.ts
rename to js/src/predicate.ts
index 263b8646d71fc..c2be4db75750b 100644
--- a/js/src/vector/predicate.ts
+++ b/js/src/predicate.ts
@@ -1,5 +1,5 @@
-import { Vector } from "../vector/vector";
-import { DictionaryVector } from "../vector/dictionary";
+import { Vector } from "./vector/vector";
+import { DictionaryVector } from "./vector/dictionary";
 
 export type ValueFunc<T> = (idx: number, cols: Vector[]) => T|null;
 export type PredicateFunc = (idx: number, cols: Vector[]) => boolean;
diff --git a/js/src/vector/table.ts b/js/src/table.ts
similarity index 89%
rename from js/src/vector/table.ts
rename to js/src/table.ts
index e81fe16a94ae8..4ab34192376f8 100644
--- a/js/src/vector/table.ts
+++ b/js/src/table.ts
@@ -15,20 +15,25 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Vector } from './vector';
-import { StructVector, StructRow } from './struct';
-import { read, readAsync } from '../reader/arrow';
+import { Vector } from './vector/vector';
+import { read, readAsync } from './reader/arrow';
 import { Predicate } from './predicate';
 
 export type NextFunc = (idx: number, cols: Vector[]) => void;
 
-export class DataFrameRow extends StructRow<any> {
-    constructor (batch: Vector[], idx: number) {
-        super(new StructVector({columns: batch}), idx);
+export class TableRow {
+    constructor (readonly batch: Vector[], readonly idx: number) {}
+    toArray() {
+        return this.batch.map((vec) => vec.get(this.idx));
     }
     toString() {
         return this.toArray().map((x) => JSON.stringify(x)).join(', ');
     }
+    *[Symbol.iterator]() {
+        for (const vec of this.batch) {
+            yield vec.get(this.idx);
+        }
+    }
 }
 
 export interface DataFrame {
@@ -46,7 +51,7 @@ function columnsFromBatches(batches: Vector[][]) {
     );
 }
 
-export class Table extends StructVector<any> implements DataFrame {
+export class Table implements DataFrame {
     static from(sources?: Iterable<Uint8Array | Buffer | string> | object | string) {
         let batches: Vector<any>[][] = [[]];
         if (sources) {
@@ -73,20 +78,20 @@ export class Table extends StructVector<any> implements DataFrame {
     readonly lengths: Uint32Array;
     readonly length: number;
     constructor(argv: { batches: Vector<any>[][] }) {
-        super({columns: columnsFromBatches(argv.batches)});
         this.batches = argv.batches;
+        this.columns = columnsFromBatches(this.batches);
         this.lengths = new Uint32Array(this.batches.map((batch) => batch[0].length));
 
         this.length = this.lengths.reduce((acc, length) => acc + length);
     }
-    get(idx: number): DataFrameRow {
+    get(idx: number): TableRow {
         let batch = 0;
         while (idx > this.lengths[batch] && batch < this.lengths.length)
             idx -= this.lengths[batch++];
 
         if (batch === this.lengths.length) throw new Error("Overflow")
 
-        else return new DataFrameRow(this.batches[batch], idx);
+        else return new TableRow(this.batches[batch], idx);
     }
     filter(predicate: Predicate): DataFrame {
         return new FilteredDataFrame(this, predicate);
@@ -116,7 +121,7 @@ export class Table extends StructVector<any> implements DataFrame {
 
             // yield all indices
             for (let idx = -1; ++idx < length;) {
-                yield new DataFrameRow(columns, idx);
+                yield new TableRow(columns, idx);
             }
         }
     }

From a788db315cf6410bd64fc079325696c0324b45bc Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Thu, 11 Jan 2018 16:55:50 -0500
Subject: [PATCH 12/19] Cleanup

---
 js/perf/index.js    | 8 ++++----
 js/src/predicate.ts | 9 ---------
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/js/perf/index.js b/js/perf/index.js
index 9527a8e842c5a..0be4db3084dbf 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -16,10 +16,10 @@
 // under the License.
 
 // Use the ES5 UMD target as perf baseline
-const { col, DataFrame, Table, readVectors } = require('../targets/es5/umd');
-// const { col, DataFrame, Table, readVectors } = require('../targets/es5/cjs');
-// const { col, DataFrame, Table, readVectors } = require('../targets/es2015/umd');
-// const { col, DataFrame, Table, readVectors } = require('../targets/es2015/cjs');
+// const { col, Table, readVectors } = require('../targets/es5/umd');
+// const { col, Table, readVectors } = require('../targets/es5/cjs');
+// const { col, Table, readVectors } = require('../targets/es2015/umd');
+const { col, Table, readVectors } = require('../targets/es2015/cjs');
 
 const config = require('./config');
 const Benchmark = require('benchmark');
diff --git a/js/src/predicate.ts b/js/src/predicate.ts
index c2be4db75750b..dbfc7479ffbb0 100644
--- a/js/src/predicate.ts
+++ b/js/src/predicate.ts
@@ -180,15 +180,6 @@ class GTeq extends ComparisonPredicate {
         const col_func = col.bind(cols);
         return (idx: number, cols: Vector[]) => col_func(idx, cols) >= lit.v;
     }
-    //eval(idx: number, cols: Vector[]) {
-    //    return this.left.eval(idx, cols) >= this.right.eval(idx, cols);
-    //}
-    //emitString() {
-    //    return `${this.left.emitString()} >= ${this.right.emitString()}`
-    //}
-    //createDictionaryEval(schema, lit: Literal, col: Col): (idx: number, cols: Vector[]) => boolean {
-    //    return this.eval;
-    //}
 }
 
 export function lit(n: number): Value<any> { return new Literal(n); }

From 2e118aba789afb40d6b596625e89cd008773999f Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Thu, 11 Jan 2018 17:02:44 -0500
Subject: [PATCH 13/19] linter

---
 js/src/Arrow.ts     |  2 --
 js/src/predicate.ts | 26 +++++++++++++-------------
 js/src/table.ts     | 13 +++++++------
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index ed9ff577bb1e5..926ee88720bf0 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -87,8 +87,6 @@ export {
     FixedSizeListVector,
 };
 
-
-
 /* These exports are needed for the closure umd targets */
 try {
     const Arrow = eval('exports');
diff --git a/js/src/predicate.ts b/js/src/predicate.ts
index dbfc7479ffbb0..2b0be44d472dc 100644
--- a/js/src/predicate.ts
+++ b/js/src/predicate.ts
@@ -1,29 +1,29 @@
-import { Vector } from "./vector/vector";
-import { DictionaryVector } from "./vector/dictionary";
+import { Vector } from './vector/vector';
+import { DictionaryVector } from './vector/dictionary';
 
 export type ValueFunc<T> = (idx: number, cols: Vector[]) => T|null;
 export type PredicateFunc = (idx: number, cols: Vector[]) => boolean;
 
 export abstract class Value<T> {
     eq(other: Value<T>|T): Predicate {
-        if (!(other instanceof Value)) other = new Literal(other);
+        if (!(other instanceof Value)) { other = new Literal(other); }
         return new Equals(this, other);
     }
     lteq(other: Value<T>|T): Predicate {
-        if (!(other instanceof Value)) other = new Literal(other);
+        if (!(other instanceof Value)) { other = new Literal(other); }
         return new LTeq(this, other);
     }
     gteq(other: Value<T>|T): Predicate {
-        if (!(other instanceof Value)) other = new Literal(other);
+        if (!(other instanceof Value)) { other = new Literal(other); }
         return new GTeq(this, other);
     }
 }
 
-class Literal<T=any> extends Value<T> {
+class Literal<T= any> extends Value<T> {
     constructor(public v: T) { super(); }
 }
 
-class Col<T=any> extends Value<T> {
+class Col<T= any> extends Value<T> {
     vector: Vector<T>;
     colidx: number;
 
@@ -39,9 +39,9 @@ class Col<T=any> extends Value<T> {
                     break;
                 }
             }
-            if (this.colidx < 0) throw new Error(`Failed to bind Col "${this.name}"`)
+            if (this.colidx < 0) { throw new Error(`Failed to bind Col "${this.name}"`); }
         }
-        this.vector = cols[this.colidx]
+        this.vector = cols[this.colidx];
         return this.vector.get.bind(this.vector);
     }
 
@@ -55,7 +55,7 @@ export abstract class Predicate {
     ands(): Predicate[] { return [this]; }
 }
 
-abstract class ComparisonPredicate<T=any> extends Predicate {
+abstract class ComparisonPredicate<T= any> extends Predicate {
     constructor(public readonly left: Value<T>, public readonly right: Value<T>) {
         super();
     }
@@ -94,7 +94,7 @@ class And extends CombinationPredicate {
         const right = this.right.bind(cols);
         return (idx: number, cols: Vector[]) => left(idx, cols) && right(idx, cols);
     }
-    ands() : Predicate[] { return this.left.ands().concat(this.right.ands()); }
+    ands(): Predicate[] { return this.left.ands().concat(this.right.ands()); }
 }
 
 class Or extends CombinationPredicate {
@@ -121,7 +121,7 @@ class Equals extends ComparisonPredicate {
         const col_func = col.bind(cols);
         if (col.vector instanceof DictionaryVector) {
             // Assume that there is only one key with the value `lit.v`
-            let key = -1
+            let key = -1;
             for (; ++key < col.vector.data.length;) {
                 if (col.vector.data.get(key) === lit.v) {
                     break;
@@ -138,7 +138,7 @@ class Equals extends ComparisonPredicate {
             } else {
                 return (idx: number) => {
                     return (col.vector as DictionaryVector<any>).getKey(idx) === key;
-                }
+                };
             }
         } else {
             return (idx: number, cols: Vector[]) => col_func(idx, cols) == lit.v;
diff --git a/js/src/table.ts b/js/src/table.ts
index 4ab34192376f8..613699f0b66b1 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -86,12 +86,13 @@ export class Table implements DataFrame {
     }
     get(idx: number): TableRow {
         let batch = 0;
-        while (idx > this.lengths[batch] && batch < this.lengths.length)
+        while (idx > this.lengths[batch] && batch < this.lengths.length) {
             idx -= this.lengths[batch++];
+        }
 
-        if (batch === this.lengths.length) throw new Error("Overflow")
+        if (batch === this.lengths.length) { throw new Error('Overflow'); }
 
-        else return new TableRow(this.batches[batch], idx);
+        return new TableRow(this.batches[batch], idx);
     }
     filter(predicate: Predicate): DataFrame {
         return new FilteredDataFrame(this, predicate);
@@ -105,7 +106,7 @@ export class Table implements DataFrame {
 
             // yield all indices
             for (let idx = -1; ++idx < length;) {
-                next(idx, columns)
+                next(idx, columns);
             }
         }
     }
@@ -149,7 +150,7 @@ class FilteredDataFrame implements DataFrame {
 
             // yield all indices
             for (let idx = -1; ++idx < length;) {
-                if (predicate(idx, columns)) next(idx, columns);
+                if (predicate(idx, columns)) { next(idx, columns); }
             }
         }
     }
@@ -171,7 +172,7 @@ class FilteredDataFrame implements DataFrame {
 
             // yield all indices
             for (let idx = -1; ++idx < length;) {
-                if (predicate(idx, columns)) ++sum;
+                if (predicate(idx, columns)) { ++sum; }
             }
         }
         return sum;

From 2f4a3491e0a8593041e828964095cac77a31d0e8 Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Fri, 12 Jan 2018 13:11:20 -0500
Subject: [PATCH 14/19] Minor tweaks

---
 js/src/predicate.ts | 17 ++++++++++-------
 js/src/table.ts     | 21 +++++++--------------
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/js/src/predicate.ts b/js/src/predicate.ts
index 2b0be44d472dc..1fedc98d1c41e 100644
--- a/js/src/predicate.ts
+++ b/js/src/predicate.ts
@@ -19,11 +19,11 @@ export abstract class Value<T> {
     }
 }
 
-class Literal<T= any> extends Value<T> {
+export class Literal<T= any> extends Value<T> {
     constructor(public v: T) { super(); }
 }
 
-class Col<T= any> extends Value<T> {
+export class Col<T= any> extends Value<T> {
     vector: Vector<T>;
     colidx: number;
 
@@ -55,7 +55,7 @@ export abstract class Predicate {
     ands(): Predicate[] { return [this]; }
 }
 
-abstract class ComparisonPredicate<T= any> extends Predicate {
+export abstract class ComparisonPredicate<T= any> extends Predicate {
     constructor(public readonly left: Value<T>, public readonly right: Value<T>) {
         super();
     }
@@ -105,7 +105,7 @@ class Or extends CombinationPredicate {
     }
 }
 
-class Equals extends ComparisonPredicate {
+export class Equals extends ComparisonPredicate {
     protected _bindLitLit(_: Vector<any>[], left: Literal, right: Literal): PredicateFunc {
         const rtrn: boolean = left.v == right.v;
         return () => rtrn;
@@ -121,6 +121,9 @@ class Equals extends ComparisonPredicate {
         const col_func = col.bind(cols);
         if (col.vector instanceof DictionaryVector) {
             // Assume that there is only one key with the value `lit.v`
+            // TODO: add lazily-computed reverse dictionary lookups, associated
+            // with col.vector.data so that we only have to do this once per
+            // dictionary
             let key = -1;
             for (; ++key < col.vector.data.length;) {
                 if (col.vector.data.get(key) === lit.v) {
@@ -146,7 +149,7 @@ class Equals extends ComparisonPredicate {
     }
 }
 
-class LTeq extends ComparisonPredicate {
+export class LTeq extends ComparisonPredicate {
     protected _bindLitLit(_: Vector<any>[], left: Literal, right: Literal): PredicateFunc {
         const rtrn: boolean = left.v <= right.v;
         return () => rtrn;
@@ -164,7 +167,7 @@ class LTeq extends ComparisonPredicate {
     }
 }
 
-class GTeq extends ComparisonPredicate {
+export class GTeq extends ComparisonPredicate {
     protected _bindLitLit(_: Vector<any>[], left: Literal, right: Literal): PredicateFunc {
         const rtrn: boolean = left.v >= right.v;
         return () => rtrn;
@@ -183,4 +186,4 @@ class GTeq extends ComparisonPredicate {
 }
 
 export function lit(n: number): Value<any> { return new Literal(n); }
-export function col(n: string): Value<any> { return new Col(n); }
+export function col(n: string): Col<any> { return new Col(n); }
diff --git a/js/src/table.ts b/js/src/table.ts
index 613699f0b66b1..620a4a701c80f 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -37,8 +37,6 @@ export class TableRow {
 }
 
 export interface DataFrame {
-    readonly batches: Vector[][];
-    readonly lengths: Uint32Array;
     filter(predicate: Predicate): DataFrame;
     scan(next: NextFunc): void;
     count(): number;
@@ -129,23 +127,18 @@ export class Table implements DataFrame {
 }
 
 class FilteredDataFrame implements DataFrame {
-    readonly lengths: Uint32Array;
-    readonly batches: Vector[][];
-    constructor (readonly parent: DataFrame, private predicate: Predicate) {
-        this.batches = parent.batches;
-        this.lengths = parent.lengths;
-    }
+    constructor (readonly parent: Table, private predicate: Predicate) {}
 
     scan(next: NextFunc) {
         // inlined version of this:
         // this.parent.scan((idx, columns) => {
         //     if (this.predicate(idx, columns)) next(idx, columns);
         // });
-        for (let batch = -1; ++batch < this.lengths.length;) {
-            const length = this.lengths[batch];
+        for (let batch = -1; ++batch < this.parent.lengths.length;) {
+            const length = this.parent.lengths[batch];
 
             // load batches
-            const columns = this.batches[batch];
+            const columns = this.parent.batches[batch];
             const predicate = this.predicate.bind(columns);
 
             // yield all indices
@@ -163,11 +156,11 @@ class FilteredDataFrame implements DataFrame {
         // });
         // return sum;
         let sum = 0;
-        for (let batch = -1; ++batch < this.lengths.length;) {
-            const length = this.lengths[batch];
+        for (let batch = -1; ++batch < this.parent.lengths.length;) {
+            const length = this.parent.lengths[batch];
 
             // load batches
-            const columns = this.batches[batch];
+            const columns = this.parent.batches[batch];
             const predicate = this.predicate.bind(columns);
 
             // yield all indices

From 671914776f3454748c39cb4ee97714ead788e0bd Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Mon, 15 Jan 2018 12:39:33 -0500
Subject: [PATCH 15/19] Add DataFrame.countBy operation

---
 js/perf/index.js        | 26 ++++++++++++---
 js/perf/table_config.js | 10 ++++--
 js/src/table.ts         | 73 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 101 insertions(+), 8 deletions(-)

diff --git a/js/perf/index.js b/js/perf/index.js
index 0be4db3084dbf..d31b6430ec871 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -41,17 +41,23 @@ for (let { name, buffers} of config) {
     suites.push(getByIndexSuite, iterateSuite, sliceSuite, parseSuite);
 }
 
-for (let {name, buffers, tests} of require('./table_config')) {
+for (let {name, buffers, countBys, counts} of require('./table_config')) {
+    const table = Table.from(buffers);
+
+    const dfCountBySuite = new Benchmark.Suite(`DataFrame Count By "${name}"`, { async: true });
+    for (countBy of countBys) {
+        dfCountBySuite.add(createDataFrameCountByTest(table, countBy));
+    }
+
     const dfFilterCountSuite = new Benchmark.Suite(`DataFrame Filter-Scan Count "${name}"`, { async: true });
     const dfDirectCountSuite = new Benchmark.Suite(`DataFrame Direct Count "${name}"`, { async: true });
-    const table = Table.from(buffers);
 
-    for (test of tests) {
+    for (test of counts) {
         dfFilterCountSuite.add(createDataFrameFilterCountTest(table, test.col, test.test, test.value))
         dfDirectCountSuite.add(createDataFrameDirectCountTest(table, test.col, test.test, test.value))
     }
 
-    suites.push(dfFilterCountSuite, dfDirectCountSuite)
+    suites.push(dfCountBySuite, dfFilterCountSuite, dfDirectCountSuite)
 }
 
 console.log('Running apache-arrow performance tests...\n');
@@ -167,6 +173,18 @@ function createDataFrameDirectCountTest(table, column, test, value) {
     };
 }
 
+function createDataFrameCountByTest(table, column) {
+    let colidx = table.columns.findIndex((c)=>c.name === column);
+
+    return {
+        async: true,
+        name: `name: '${column}', length: ${table.length}, type: ${table.columns[colidx].type}`,
+        fn() {
+            table.countBy(col(column));
+        }
+    };
+}
+
 function createDataFrameFilterCountTest(table, column, test, value) {
     let colidx = table.columns.findIndex((c)=>c.name === column);
     let df;
diff --git a/js/perf/table_config.js b/js/perf/table_config.js
index 3c045e4571e44..e3c332c870f38 100644
--- a/js/perf/table_config.js
+++ b/js/perf/table_config.js
@@ -22,7 +22,10 @@ const glob = require('glob');
 const config = [];
 const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.arrow`));
 
-tests = {
+countBys = {
+    "tracks": ['origin', 'destination']
+}
+counts = {
     "tracks": [
         {col: 'lat',    test: 'gteq', value: 0        },
         {col: 'lng',    test: 'gteq', value: 0        },
@@ -32,11 +35,12 @@ tests = {
 
 for (const filename of filenames) {
     const { name } = path.parse(filename);
-    if (name in tests) {
+    if (name in counts) {
         config.push({
             name,
             buffers: [fs.readFileSync(filename)],
-            tests: tests[name]
+            countBys: countBys[name],
+            counts: counts[name],
         });
     }
 }
diff --git a/js/src/table.ts b/js/src/table.ts
index 620a4a701c80f..6f312746f2c71 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -16,8 +16,10 @@
 // under the License.
 
 import { Vector } from './vector/vector';
+import { DictionaryVector } from './vector/dictionary';
+import { Uint32Vector } from './vector/numeric';
 import { read, readAsync } from './reader/arrow';
-import { Predicate } from './predicate';
+import { Col, Predicate } from './predicate';
 
 export type NextFunc = (idx: number, cols: Vector[]) => void;
 
@@ -40,6 +42,7 @@ export interface DataFrame {
     filter(predicate: Predicate): DataFrame;
     scan(next: NextFunc): void;
     count(): number;
+    countBy(col: (Col|string)): Table;
 }
 
 function columnsFromBatches(batches: Vector[][]) {
@@ -111,6 +114,40 @@ export class Table implements DataFrame {
     count(): number {
         return this.lengths.reduce((acc, val) => acc + val);
     }
+    countBy(count_by: (Col|string)): Table {
+        if (count_by instanceof String) {
+            count_by = new Col(count_by);
+        }
+
+        // the last batch will have the most complete dictionary, use it's data
+        // vector as our count by keys
+        count_by.bind(this.batches[this.batches.length - 1]);
+        if (!(count_by.vector instanceof DictionaryVector)) {
+            throw new Error("countBy currently only supports dictionary-encoded columns");
+        }
+
+        let keys: Vector = (count_by.vector as DictionaryVector<any>).data;
+        // TODO: Adjust array byte width based on overall length
+        // (e.g. if this.length <= 255 use Uint8Array, etc...)
+        let counts: Uint32Array = new Uint32Array(keys.length);
+
+
+        for (let batch = -1; ++batch < this.lengths.length;) {
+            const length = this.lengths[batch];
+
+            // load batches
+            const columns = this.batches[batch];
+            count_by.bind(columns);
+
+            // yield all indices
+            for (let idx = -1; ++idx < length;) {
+                let key = (count_by.vector as DictionaryVector<any>).getKey(idx)
+                if (key !== null) { counts[key]++; }
+            }
+        }
+
+        return new Table({batches: [[keys, new Uint32Vector({data: counts})]]})
+    }
     *[Symbol.iterator]() {
         for (let batch = -1; ++batch < this.lengths.length;) {
             const length = this.lengths[batch];
@@ -177,4 +214,38 @@ class FilteredDataFrame implements DataFrame {
             this.predicate.and(predicate)
         );
     }
+
+    countBy(count_by: (Col|string)): Table {
+        if (count_by instanceof String) {
+            count_by = new Col(count_by);
+        }
+
+        // the last batch will have the most complete dictionary, use it's data
+        // vector as our count by keys
+        count_by.bind(this.parent.batches[this.parent.batches.length - 1]);
+        if (!(count_by.vector instanceof DictionaryVector)) {
+            throw new Error("countBy currently only supports dictionary-encoded columns");
+        }
+
+        let keys: Vector = (count_by.vector as DictionaryVector<any>).data;
+        let counts: Uint32Array = new Uint32Array(keys.length);
+
+
+        for (let batch = -1; ++batch < this.parent.lengths.length;) {
+            const length = this.parent.lengths[batch];
+
+            // load batches
+            const columns = this.parent.batches[batch];
+            const predicate = this.predicate.bind(columns);
+            count_by.bind(columns);
+
+            // yield all indices
+            for (let idx = -1; ++idx < length;) {
+                let key = (count_by.vector as DictionaryVector<any>).getKey(idx)
+                if (key !== null && predicate(idx, columns)) { counts[key]++; }
+            }
+        }
+
+        return new Table({batches: [[keys, new Uint32Vector({data: counts})]]})
+    }
 }

From 724488702a6aa8abb16d3a9f6ffb8b82ccebbe11 Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Mon, 15 Jan 2018 17:08:21 -0500
Subject: [PATCH 16/19] Add table unit tests...

.. also found and resolved some minor bugs (get(idx) batch length check should
be <=, various extern issues with UMD builds)
---
 js/src/Arrow.externs.ts     |  10 +
 js/src/Arrow.ts             |  13 +-
 js/src/table.ts             |  28 ++-
 js/test/unit/table-tests.ts | 371 ++++++++++++++++++++++++++++++++++++
 4 files changed, 411 insertions(+), 11 deletions(-)
 create mode 100644 js/test/unit/table-tests.ts

diff --git a/js/src/Arrow.externs.ts b/js/src/Arrow.externs.ts
index 0685d262cc186..abc11eff509d9 100644
--- a/js/src/Arrow.externs.ts
+++ b/js/src/Arrow.externs.ts
@@ -54,6 +54,16 @@ Table.prototype.toString;
 Table.prototype.lengths;
 /** @type {?} */
 Table.prototype.batches;
+/** @type {?} */
+Table.prototype.countBy;
+/** @type {?} */
+Table.prototype.scan;
+/** @type {?} */
+Table.prototype.get;
+
+let CountByResult = function() {};
+/** @type {?} */
+CountByResult.prototype.asJSON;
 
 let Vector = function() {};
 /** @type {?} */
diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index 926ee88720bf0..21eb2976d44a4 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { Table, TableRow } from './table';
-import { lit, col } from './predicate';
+import { Table, TableRow, CountByResult } from './table';
+import { lit, col, Col, Value } from './predicate';
 import { Vector } from './vector/vector';
 import { Utf8Vector } from './vector/utf8';
 import { DictionaryVector } from './vector/dictionary';
@@ -54,8 +54,8 @@ Table['fromAsync'] = Table.fromAsync;
 BoolVector['pack'] = BoolVector.pack;
 
 export { read, readAsync };
-export { Table, TableRow };
-export { lit, col };
+export { Table, TableRow, CountByResult };
+export { lit, col, Col, Value };
 export { Vector, StructRow };
 export { Uint64, Int64, Int128 };
 export { NumericVectorConstructor } from './vector/numeric';
@@ -94,9 +94,11 @@ try {
         // string indexers tell closure compiler not to rename these properties
         Arrow['lit'] = lit;
         Arrow['col'] = col;
+        Arrow['Col'] = Col;
         Arrow['read'] = read;
-        Arrow['readAsync'] = readAsync;
+        Arrow['Value'] = Value;
         Arrow['Table'] = Table;
+        Arrow['readAsync'] = readAsync;
         Arrow['Vector'] = Vector;
         Arrow['StructRow'] = StructRow;
         Arrow['BoolVector'] = BoolVector;
@@ -120,6 +122,7 @@ try {
         Arrow['Float32Vector'] = Float32Vector;
         Arrow['Float64Vector'] = Float64Vector;
         Arrow['DecimalVector'] = DecimalVector;
+        Arrow['CountByResult'] = CountByResult;
         Arrow['TimestampVector'] = TimestampVector;
         Arrow['DictionaryVector'] = DictionaryVector;
         Arrow['FixedSizeListVector'] = FixedSizeListVector;
diff --git a/js/src/table.ts b/js/src/table.ts
index 6f312746f2c71..f00b5ef9da1df 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -42,7 +42,7 @@ export interface DataFrame {
     filter(predicate: Predicate): DataFrame;
     scan(next: NextFunc): void;
     count(): number;
-    countBy(col: (Col|string)): Table;
+    countBy(col: (Col|string)): CountByResult;
 }
 
 function columnsFromBatches(batches: Vector[][]) {
@@ -87,7 +87,7 @@ export class Table implements DataFrame {
     }
     get(idx: number): TableRow {
         let batch = 0;
-        while (idx > this.lengths[batch] && batch < this.lengths.length) {
+        while (idx >= this.lengths[batch] && batch < this.lengths.length) {
             idx -= this.lengths[batch++];
         }
 
@@ -114,7 +114,7 @@ export class Table implements DataFrame {
     count(): number {
         return this.lengths.reduce((acc, val) => acc + val);
     }
-    countBy(count_by: (Col|string)): Table {
+    countBy(count_by: (Col|string)): CountByResult {
         if (count_by instanceof String) {
             count_by = new Col(count_by);
         }
@@ -146,7 +146,7 @@ export class Table implements DataFrame {
             }
         }
 
-        return new Table({batches: [[keys, new Uint32Vector({data: counts})]]})
+        return new CountByResult(keys, new Uint32Vector({data: counts}))
     }
     *[Symbol.iterator]() {
         for (let batch = -1; ++batch < this.lengths.length;) {
@@ -215,7 +215,7 @@ class FilteredDataFrame implements DataFrame {
         );
     }
 
-    countBy(count_by: (Col|string)): Table {
+    countBy(count_by: (Col|string)): CountByResult {
         if (count_by instanceof String) {
             count_by = new Col(count_by);
         }
@@ -246,6 +246,22 @@ class FilteredDataFrame implements DataFrame {
             }
         }
 
-        return new Table({batches: [[keys, new Uint32Vector({data: counts})]]})
+        return new CountByResult(keys, new Uint32Vector({data: counts}))
+    }
+}
+
+export class CountByResult extends Table implements DataFrame {
+    constructor(readonly keys: Vector, readonly counts: Vector<number|null>) {
+        super({batches: [[keys, counts]]});
+    }
+
+    asJSON(): Object {
+        let result: {[key: string]: number|null} = {};
+
+        for (let i = -1; ++i < this.length;) {
+            result[this.keys.get(i)] = this.counts.get(i);
+        }
+
+        return result;
     }
 }
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
new file mode 100644
index 0000000000000..33fb2d178b0d2
--- /dev/null
+++ b/js/test/unit/table-tests.ts
@@ -0,0 +1,371 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import Arrow, {
+} from '../Arrow';
+
+const {
+    col,
+    Table,
+} = Arrow;
+
+describe(`Table`, () => {
+    describe(`single record batch`, () => {
+        const table = Table.from({
+          "schema": {
+            "fields": [
+              {
+                "name": "f32",
+                "type": {
+                  "name": "floatingpoint",
+                  "precision": "SINGLE"
+                },
+                "nullable": false,
+                "children": [],
+              },
+              {
+                "name": "i32",
+                "type": {
+                  "name": "int",
+                  "isSigned": true,
+                  "bitWidth": 32
+                },
+                "nullable": false,
+                "children": [],
+              },
+              {
+                "name": "dictionary",
+                "type": {
+                  "name": "utf8"
+                },
+                "nullable": false,
+                "children": [],
+                "dictionary": {
+                  "id": 0,
+                  "indexType": {
+                    "name": "int",
+                    "isSigned": true,
+                    "bitWidth": 8
+                  },
+                  "isOrdered": false
+                }
+              }
+            ]
+          },
+          "dictionaries": [{
+            "id": 0,
+            "data": {
+              "count": 3,
+              "columns": [
+                {
+                  "name": "DICT0",
+                  "count": 3,
+                  "VALIDITY": [],
+                  "OFFSET": [
+                    0,
+                    1,
+                    2,
+                    3
+                  ],
+                  "DATA": [
+                    "a",
+                    "b",
+                    "c",
+                  ]
+                }
+              ]
+            }
+          }],
+          "batches": [{
+            "count": 7,
+            "columns": [
+              {
+                "name": "f32",
+                "count": 7,
+                "VALIDITY": [],
+                "DATA": [-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3]
+              },
+              {
+                "name": "i32",
+                "count": 7,
+                "VALIDITY": [],
+                "DATA": [-1, 1, -1, 1, -1, 1, -1]
+              },
+              {
+                "name": "dictionary",
+                "count": 7,
+                "VALIDITY": [],
+                "DATA": [0, 1, 2, 0, 1, 2, 0]
+              }
+            ]
+          }]
+        });
+
+        // Wrap floating point values in a Float32Array and take them back out to
+        // make sure that equality checks will pass
+        const values = [
+            [new Float32Array([-0.3])[0], -1, 'a'],
+            [new Float32Array([-0.2])[0],  1, 'b'],
+            [new Float32Array([-0.1])[0], -1, 'c'],
+            [new Float32Array([ 0  ])[0],  1, 'a'],
+            [new Float32Array([ 0.1])[0], -1, 'b'],
+            [new Float32Array([ 0.2])[0],  1, 'c'],
+            [new Float32Array([ 0.3])[0], -1, 'a']
+        ]
+        test(`has the correct length`, () => {
+            expect(table.length).toEqual(values.length);
+        });
+        test(`gets expected values`, () => {
+            for (let i = -1; ++i < values.length;) {
+                expect(table.get(i).toArray()).toEqual(values[i]);
+            }
+        });
+        test(`iterates expected values`, () => {
+            let i = 0;
+            for (let row of table) {
+                expect(row.toArray()).toEqual(values[i++]);
+            }
+        });
+        test(`scans expected values`, () => {
+            let expected_idx = 0;
+            table.scan((idx, cols) => {
+                expect(cols.map((c)=>c.get(idx))).toEqual(values[expected_idx++]);
+            });
+        });
+        test(`count() returns the correct length`, () => {
+            expect(table.count()).toEqual(values.length);
+        });
+        test(`filter on f32 >= 0 returns the correct length`, () => {
+            expect(table.filter(col('f32').gteq(0)).count()).toEqual(4);
+        });
+        test(`filter on i32 <= 0 returns the correct length`, () => {
+            expect(table.filter(col('i32').lteq(0)).count()).toEqual(4);
+        });
+        test(`filter on dictionary == 'a' returns the correct length`, () => {
+            expect(table.filter(col('dictionary').eq('a')).count()).toEqual(3);
+        });
+        test(`countBy on dictionary returns the correct counts`, () => {
+            expect(table.countBy(col('dictionary')).asJSON()).toEqual({
+                'a': 3,
+                'b': 2,
+                'c': 2,
+            });
+        });
+        test(`countBy on dictionary with filter returns the correct counts`, () => {
+            expect(table.filter(col('i32').eq(1)).countBy(col('dictionary')).asJSON()).toEqual({
+                'a': 1,
+                'b': 1,
+                'c': 1,
+            });
+        });
+    });
+    describe(`multiple record batches`, () => {
+        const table = Table.from({
+          "schema": {
+            "fields": [
+              {
+                "name": "f32",
+                "type": {
+                  "name": "floatingpoint",
+                  "precision": "SINGLE"
+                },
+                "nullable": false,
+                "children": [],
+              },
+              {
+                "name": "i32",
+                "type": {
+                  "name": "int",
+                  "isSigned": true,
+                  "bitWidth": 32
+                },
+                "nullable": false,
+                "children": [],
+              },
+              {
+                "name": "dictionary",
+                "type": {
+                  "name": "utf8"
+                },
+                "nullable": false,
+                "children": [],
+                "dictionary": {
+                  "id": 0,
+                  "indexType": {
+                    "name": "int",
+                    "isSigned": true,
+                    "bitWidth": 8
+                  },
+                  "isOrdered": false
+                }
+              }
+            ]
+          },
+          "dictionaries": [{
+            "id": 0,
+            "data": {
+              "count": 3,
+              "columns": [
+                {
+                  "name": "DICT0",
+                  "count": 3,
+                  "VALIDITY": [],
+                  "OFFSET": [
+                    0,
+                    1,
+                    2,
+                    3
+                  ],
+                  "DATA": [
+                    "a",
+                    "b",
+                    "c",
+                  ]
+                }
+              ]
+            }
+          }],
+          "batches": [{
+            "count": 3,
+            "columns": [
+              {
+                "name": "f32",
+                "count": 3,
+                "VALIDITY": [],
+                "DATA": [-0.3, -0.2, -0.1]
+              },
+              {
+                "name": "i32",
+                "count": 3,
+                "VALIDITY": [],
+                "DATA": [-1, 1, -1]
+              },
+              {
+                "name": "dictionary",
+                "count": 3,
+                "VALIDITY": [],
+                "DATA": [0, 1, 2]
+              }
+            ]
+          }, {
+            "count": 3,
+            "columns": [
+              {
+                "name": "f32",
+                "count": 3,
+                "VALIDITY": [],
+                "DATA": [0, 0.1, 0.2]
+              },
+              {
+                "name": "i32",
+                "count": 3,
+                "VALIDITY": [],
+                "DATA": [1, -1, 1]
+              },
+              {
+                "name": "dictionary",
+                "count": 3,
+                "VALIDITY": [],
+                "DATA": [0, 1, 2]
+              }
+            ]
+          }, {
+            "count": 3,
+            "columns": [
+              {
+                "name": "f32",
+                "count": 3,
+                "VALIDITY": [],
+                "DATA": [0.3, 0.2, 0.1]
+              },
+              {
+                "name": "i32",
+                "count": 3,
+                "VALIDITY": [],
+                "DATA": [-1, 1, -1]
+              },
+              {
+                "name": "dictionary",
+                "count": 3,
+                "VALIDITY": [],
+                "DATA": [0, 1, 2]
+              }
+            ]
+          }]
+        });
+
+        // Wrap floating point values in a Float32Array and take them back out to
+        // make sure that equality checks will pass
+        const values = [
+            [new Float32Array([-0.3])[0], -1, 'a'],
+            [new Float32Array([-0.2])[0],  1, 'b'],
+            [new Float32Array([-0.1])[0], -1, 'c'],
+            [new Float32Array([ 0  ])[0],  1, 'a'],
+            [new Float32Array([ 0.1])[0], -1, 'b'],
+            [new Float32Array([ 0.2])[0],  1, 'c'],
+            [new Float32Array([ 0.3])[0], -1, 'a'],
+            [new Float32Array([ 0.2])[0],  1, 'b'],
+            [new Float32Array([ 0.1])[0], -1, 'c'],
+        ]
+        test(`has the correct length`, () => {
+            expect(table.length).toEqual(values.length);
+        });
+        test(`gets expected values`, () => {
+            for (let i = -1; ++i < values.length;) {
+                expect(table.get(i).toArray()).toEqual(values[i]);
+            }
+        });
+        test(`iterates expected values`, () => {
+            let i = 0;
+            for (let row of table) {
+                expect(row.toArray()).toEqual(values[i++]);
+            }
+        });
+        test(`scans expected values`, () => {
+            let expected_idx = 0;
+            table.scan((idx, cols) => {
+                expect(cols.map((c)=>c.get(idx))).toEqual(values[expected_idx++]);
+            });
+        });
+        test(`count() returns the correct length`, () => {
+            expect(table.count()).toEqual(values.length);
+        });
+        test(`filter on f32 >= 0 returns the correct length`, () => {
+            expect(table.filter(col('f32').gteq(0)).count()).toEqual(6);
+        });
+        test(`filter on i32 <= 0 returns the correct length`, () => {
+            expect(table.filter(col('i32').lteq(0)).count()).toEqual(5);
+        });
+        test(`filter on dictionary == 'a' returns the correct length`, () => {
+            expect(table.filter(col('dictionary').eq('a')).count()).toEqual(3);
+        });
+        test(`countBy on dictionary returns the correct counts`, () => {
+            expect(table.countBy(col('dictionary')).asJSON()).toEqual({
+                'a': 3,
+                'b': 3,
+                'c': 3,
+            });
+        });
+        test(`countBy on dictionary with filter returns the correct counts`, () => {
+            expect(table.filter(col('i32').eq(1)).countBy(col('dictionary')).asJSON()).toEqual({
+                'a': 1,
+                'b': 2,
+                'c': 1,
+            });
+        });
+    });
+});

From 20717d59379b6c5f590e0a6771a3e0818ddc3de8 Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Mon, 15 Jan 2018 17:53:22 -0500
Subject: [PATCH 17/19] Fixed countBy(string)

---
 js/perf/index.js            |  2 +-
 js/src/table.ts             |  4 ++--
 js/test/unit/table-tests.ts | 16 +++++++++++++++-
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/js/perf/index.js b/js/perf/index.js
index d31b6430ec871..29d5edf56de8e 100644
--- a/js/perf/index.js
+++ b/js/perf/index.js
@@ -180,7 +180,7 @@ function createDataFrameCountByTest(table, column) {
         async: true,
         name: `name: '${column}', length: ${table.length}, type: ${table.columns[colidx].type}`,
         fn() {
-            table.countBy(col(column));
+            table.countBy(column);
         }
     };
 }
diff --git a/js/src/table.ts b/js/src/table.ts
index f00b5ef9da1df..554844be2c8c4 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -115,7 +115,7 @@ export class Table implements DataFrame {
         return this.lengths.reduce((acc, val) => acc + val);
     }
     countBy(count_by: (Col|string)): CountByResult {
-        if (count_by instanceof String) {
+        if (!(count_by instanceof Col)) {
             count_by = new Col(count_by);
         }
 
@@ -216,7 +216,7 @@ class FilteredDataFrame implements DataFrame {
     }
 
     countBy(count_by: (Col|string)): CountByResult {
-        if (count_by instanceof String) {
+        if (!(count_by instanceof Col)) {
             count_by = new Col(count_by);
         }
 
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
index 33fb2d178b0d2..9b19c9e56a243 100644
--- a/js/test/unit/table-tests.ts
+++ b/js/test/unit/table-tests.ts
@@ -159,14 +159,21 @@ describe(`Table`, () => {
             expect(table.filter(col('dictionary').eq('a')).count()).toEqual(3);
         });
         test(`countBy on dictionary returns the correct counts`, () => {
+            // Make sure countBy works both with and without the Col wrapper
+            // class
             expect(table.countBy(col('dictionary')).asJSON()).toEqual({
                 'a': 3,
                 'b': 2,
                 'c': 2,
             });
+            expect(table.countBy('dictionary').asJSON()).toEqual({
+                'a': 3,
+                'b': 2,
+                'c': 2,
+            });
         });
         test(`countBy on dictionary with filter returns the correct counts`, () => {
-            expect(table.filter(col('i32').eq(1)).countBy(col('dictionary')).asJSON()).toEqual({
+            expect(table.filter(col('i32').eq(1)).countBy('dictionary').asJSON()).toEqual({
                 'a': 1,
                 'b': 1,
                 'c': 1,
@@ -354,11 +361,18 @@ describe(`Table`, () => {
             expect(table.filter(col('dictionary').eq('a')).count()).toEqual(3);
         });
         test(`countBy on dictionary returns the correct counts`, () => {
+            // Make sure countBy works both with and without the Col wrapper
+            // class
             expect(table.countBy(col('dictionary')).asJSON()).toEqual({
                 'a': 3,
                 'b': 3,
                 'c': 3,
             });
+            expect(table.countBy('dictionary').asJSON()).toEqual({
+                'a': 3,
+                'b': 3,
+                'c': 3,
+            });
         });
         test(`countBy on dictionary with filter returns the correct counts`, () => {
             expect(table.filter(col('i32').eq(1)).countBy(col('dictionary')).asJSON()).toEqual({

From edcbdbed1f45524561e362b9eb4a7ce5c88f0ecc Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Tue, 16 Jan 2018 10:37:33 -0500
Subject: [PATCH 18/19] cleanup

---
 js/src/table.ts             |  49 +++---
 js/test/unit/table-tests.ts | 290 ++++++++++++++++++------------------
 2 files changed, 175 insertions(+), 164 deletions(-)

diff --git a/js/src/table.ts b/js/src/table.ts
index 554844be2c8c4..d4fe5a93223d8 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -54,14 +54,17 @@ function columnsFromBatches(batches: Vector[][]) {
 
 export class Table implements DataFrame {
     static from(sources?: Iterable<Uint8Array | Buffer | string> | object | string) {
-        let batches: Vector<any>[][] = [[]];
+        let batches: Vector[][] = [];
         if (sources) {
-            batches = Array.from(read(sources));
+            batches = [];
+            for (let batch of read(sources)) {
+                batches.push(batch);
+            }
         }
         return new Table({ batches });
     }
     static async fromAsync(sources?: AsyncIterable<Uint8Array | Buffer | string>) {
-        let batches: Vector<any>[][] = [[]];
+        let batches: Vector[][] = [];
         if (sources) {
             batches = [];
             for await (let batch of readAsync(sources)) {
@@ -119,18 +122,17 @@ export class Table implements DataFrame {
             count_by = new Col(count_by);
         }
 
-        // the last batch will have the most complete dictionary, use it's data
-        // vector as our count by keys
+        // Assume that all dictionary batches are deltas, which means that the
+        // last record batch has the most complete dictionary
         count_by.bind(this.batches[this.batches.length - 1]);
         if (!(count_by.vector instanceof DictionaryVector)) {
-            throw new Error("countBy currently only supports dictionary-encoded columns");
+            throw new Error('countBy currently only supports dictionary-encoded columns');
         }
 
-        let keys: Vector = (count_by.vector as DictionaryVector<any>).data;
+        let data: Vector = (count_by.vector as DictionaryVector<any>).data;
         // TODO: Adjust array byte width based on overall length
         // (e.g. if this.length <= 255 use Uint8Array, etc...)
-        let counts: Uint32Array = new Uint32Array(keys.length);
-
+        let counts: Uint32Array = new Uint32Array(data.length);
 
         for (let batch = -1; ++batch < this.lengths.length;) {
             const length = this.lengths[batch];
@@ -138,15 +140,16 @@ export class Table implements DataFrame {
             // load batches
             const columns = this.batches[batch];
             count_by.bind(columns);
+            const keys: Vector = (count_by.vector as DictionaryVector<any>).keys;
 
             // yield all indices
             for (let idx = -1; ++idx < length;) {
-                let key = (count_by.vector as DictionaryVector<any>).getKey(idx)
+                let key = keys.get(idx);
                 if (key !== null) { counts[key]++; }
             }
         }
 
-        return new CountByResult(keys, new Uint32Vector({data: counts}))
+        return new CountByResult(data, new Uint32Vector({data: counts}));
     }
     *[Symbol.iterator]() {
         for (let batch = -1; ++batch < this.lengths.length;) {
@@ -220,16 +223,17 @@ class FilteredDataFrame implements DataFrame {
             count_by = new Col(count_by);
         }
 
-        // the last batch will have the most complete dictionary, use it's data
-        // vector as our count by keys
+        // Assume that all dictionary batches are deltas, which means that the
+        // last record batch has the most complete dictionary
         count_by.bind(this.parent.batches[this.parent.batches.length - 1]);
         if (!(count_by.vector instanceof DictionaryVector)) {
-            throw new Error("countBy currently only supports dictionary-encoded columns");
+            throw new Error('countBy currently only supports dictionary-encoded columns');
         }
 
-        let keys: Vector = (count_by.vector as DictionaryVector<any>).data;
-        let counts: Uint32Array = new Uint32Array(keys.length);
-
+        const data: Vector = (count_by.vector as DictionaryVector<any>).data;
+        // TODO: Adjust array byte width based on overall length
+        // (e.g. if this.length <= 255 use Uint8Array, etc...)
+        const counts: Uint32Array = new Uint32Array(data.length);
 
         for (let batch = -1; ++batch < this.parent.lengths.length;) {
             const length = this.parent.lengths[batch];
@@ -238,28 +242,29 @@ class FilteredDataFrame implements DataFrame {
             const columns = this.parent.batches[batch];
             const predicate = this.predicate.bind(columns);
             count_by.bind(columns);
+            const keys: Vector = (count_by.vector as DictionaryVector<any>).keys;
 
             // yield all indices
             for (let idx = -1; ++idx < length;) {
-                let key = (count_by.vector as DictionaryVector<any>).getKey(idx)
+                let key = keys.get(idx);
                 if (key !== null && predicate(idx, columns)) { counts[key]++; }
             }
         }
 
-        return new CountByResult(keys, new Uint32Vector({data: counts}))
+        return new CountByResult(data, new Uint32Vector({data: counts}));
     }
 }
 
 export class CountByResult extends Table implements DataFrame {
-    constructor(readonly keys: Vector, readonly counts: Vector<number|null>) {
-        super({batches: [[keys, counts]]});
+    constructor(readonly values: Vector, readonly counts: Vector<number|null>) {
+        super({batches: [[values, counts]]});
     }
 
     asJSON(): Object {
         let result: {[key: string]: number|null} = {};
 
         for (let i = -1; ++i < this.length;) {
-            result[this.keys.get(i)] = this.counts.get(i);
+            result[this.values.get(i)] = this.counts.get(i);
         }
 
         return result;
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
index 9b19c9e56a243..2b818d7ff70ea 100644
--- a/js/test/unit/table-tests.ts
+++ b/js/test/unit/table-tests.ts
@@ -26,90 +26,90 @@ const {
 describe(`Table`, () => {
     describe(`single record batch`, () => {
         const table = Table.from({
-          "schema": {
-            "fields": [
+          'schema': {
+            'fields': [
               {
-                "name": "f32",
-                "type": {
-                  "name": "floatingpoint",
-                  "precision": "SINGLE"
+                'name': 'f32',
+                'type': {
+                  'name': 'floatingpoint',
+                  'precision': 'SINGLE'
                 },
-                "nullable": false,
-                "children": [],
+                'nullable': false,
+                'children': [],
               },
               {
-                "name": "i32",
-                "type": {
-                  "name": "int",
-                  "isSigned": true,
-                  "bitWidth": 32
+                'name': 'i32',
+                'type': {
+                  'name': 'int',
+                  'isSigned': true,
+                  'bitWidth': 32
                 },
-                "nullable": false,
-                "children": [],
+                'nullable': false,
+                'children': [],
               },
               {
-                "name": "dictionary",
-                "type": {
-                  "name": "utf8"
+                'name': 'dictionary',
+                'type': {
+                  'name': 'utf8'
                 },
-                "nullable": false,
-                "children": [],
-                "dictionary": {
-                  "id": 0,
-                  "indexType": {
-                    "name": "int",
-                    "isSigned": true,
-                    "bitWidth": 8
+                'nullable': false,
+                'children': [],
+                'dictionary': {
+                  'id': 0,
+                  'indexType': {
+                    'name': 'int',
+                    'isSigned': true,
+                    'bitWidth': 8
                   },
-                  "isOrdered": false
+                  'isOrdered': false
                 }
               }
             ]
           },
-          "dictionaries": [{
-            "id": 0,
-            "data": {
-              "count": 3,
-              "columns": [
+          'dictionaries': [{
+            'id': 0,
+            'data': {
+              'count': 3,
+              'columns': [
                 {
-                  "name": "DICT0",
-                  "count": 3,
-                  "VALIDITY": [],
-                  "OFFSET": [
+                  'name': 'DICT0',
+                  'count': 3,
+                  'VALIDITY': [],
+                  'OFFSET': [
                     0,
                     1,
                     2,
                     3
                   ],
-                  "DATA": [
-                    "a",
-                    "b",
-                    "c",
+                  'DATA': [
+                    'a',
+                    'b',
+                    'c',
                   ]
                 }
               ]
             }
           }],
-          "batches": [{
-            "count": 7,
-            "columns": [
+          'batches': [{
+            'count': 7,
+            'columns': [
               {
-                "name": "f32",
-                "count": 7,
-                "VALIDITY": [],
-                "DATA": [-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3]
+                'name': 'f32',
+                'count': 7,
+                'VALIDITY': [],
+                'DATA': [-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3]
               },
               {
-                "name": "i32",
-                "count": 7,
-                "VALIDITY": [],
-                "DATA": [-1, 1, -1, 1, -1, 1, -1]
+                'name': 'i32',
+                'count': 7,
+                'VALIDITY': [],
+                'DATA': [-1, 1, -1, 1, -1, 1, -1]
               },
               {
-                "name": "dictionary",
-                "count": 7,
-                "VALIDITY": [],
-                "DATA": [0, 1, 2, 0, 1, 2, 0]
+                'name': 'dictionary',
+                'count': 7,
+                'VALIDITY': [],
+                'DATA': [0, 1, 2, 0, 1, 2, 0]
               }
             ]
           }]
@@ -125,7 +125,7 @@ describe(`Table`, () => {
             [new Float32Array([ 0.1])[0], -1, 'b'],
             [new Float32Array([ 0.2])[0],  1, 'c'],
             [new Float32Array([ 0.3])[0], -1, 'a']
-        ]
+        ];
         test(`has the correct length`, () => {
             expect(table.length).toEqual(values.length);
         });
@@ -143,7 +143,7 @@ describe(`Table`, () => {
         test(`scans expected values`, () => {
             let expected_idx = 0;
             table.scan((idx, cols) => {
-                expect(cols.map((c)=>c.get(idx))).toEqual(values[expected_idx++]);
+                expect(cols.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
             });
         });
         test(`count() returns the correct length`, () => {
@@ -179,137 +179,140 @@ describe(`Table`, () => {
                 'c': 1,
             });
         });
+        test(`countBy on non dictionary column throws error`, () => {
+            expect(() => { table.countBy('i32'); }).toThrow();
+        });
     });
     describe(`multiple record batches`, () => {
         const table = Table.from({
-          "schema": {
-            "fields": [
+          'schema': {
+            'fields': [
               {
-                "name": "f32",
-                "type": {
-                  "name": "floatingpoint",
-                  "precision": "SINGLE"
+                'name': 'f32',
+                'type': {
+                  'name': 'floatingpoint',
+                  'precision': 'SINGLE'
                 },
-                "nullable": false,
-                "children": [],
+                'nullable': false,
+                'children': [],
               },
               {
-                "name": "i32",
-                "type": {
-                  "name": "int",
-                  "isSigned": true,
-                  "bitWidth": 32
+                'name': 'i32',
+                'type': {
+                  'name': 'int',
+                  'isSigned': true,
+                  'bitWidth': 32
                 },
-                "nullable": false,
-                "children": [],
+                'nullable': false,
+                'children': [],
               },
               {
-                "name": "dictionary",
-                "type": {
-                  "name": "utf8"
+                'name': 'dictionary',
+                'type': {
+                  'name': 'utf8'
                 },
-                "nullable": false,
-                "children": [],
-                "dictionary": {
-                  "id": 0,
-                  "indexType": {
-                    "name": "int",
-                    "isSigned": true,
-                    "bitWidth": 8
+                'nullable': false,
+                'children': [],
+                'dictionary': {
+                  'id': 0,
+                  'indexType': {
+                    'name': 'int',
+                    'isSigned': true,
+                    'bitWidth': 8
                   },
-                  "isOrdered": false
+                  'isOrdered': false
                 }
               }
             ]
           },
-          "dictionaries": [{
-            "id": 0,
-            "data": {
-              "count": 3,
-              "columns": [
+          'dictionaries': [{
+            'id': 0,
+            'data': {
+              'count': 3,
+              'columns': [
                 {
-                  "name": "DICT0",
-                  "count": 3,
-                  "VALIDITY": [],
-                  "OFFSET": [
+                  'name': 'DICT0',
+                  'count': 3,
+                  'VALIDITY': [],
+                  'OFFSET': [
                     0,
                     1,
                     2,
                     3
                   ],
-                  "DATA": [
-                    "a",
-                    "b",
-                    "c",
+                  'DATA': [
+                    'a',
+                    'b',
+                    'c',
                   ]
                 }
               ]
             }
           }],
-          "batches": [{
-            "count": 3,
-            "columns": [
+          'batches': [{
+            'count': 3,
+            'columns': [
               {
-                "name": "f32",
-                "count": 3,
-                "VALIDITY": [],
-                "DATA": [-0.3, -0.2, -0.1]
+                'name': 'f32',
+                'count': 3,
+                'VALIDITY': [],
+                'DATA': [-0.3, -0.2, -0.1]
               },
               {
-                "name": "i32",
-                "count": 3,
-                "VALIDITY": [],
-                "DATA": [-1, 1, -1]
+                'name': 'i32',
+                'count': 3,
+                'VALIDITY': [],
+                'DATA': [-1, 1, -1]
               },
               {
-                "name": "dictionary",
-                "count": 3,
-                "VALIDITY": [],
-                "DATA": [0, 1, 2]
+                'name': 'dictionary',
+                'count': 3,
+                'VALIDITY': [],
+                'DATA': [0, 1, 2]
               }
             ]
           }, {
-            "count": 3,
-            "columns": [
+            'count': 3,
+            'columns': [
               {
-                "name": "f32",
-                "count": 3,
-                "VALIDITY": [],
-                "DATA": [0, 0.1, 0.2]
+                'name': 'f32',
+                'count': 3,
+                'VALIDITY': [],
+                'DATA': [0, 0.1, 0.2]
               },
               {
-                "name": "i32",
-                "count": 3,
-                "VALIDITY": [],
-                "DATA": [1, -1, 1]
+                'name': 'i32',
+                'count': 3,
+                'VALIDITY': [],
+                'DATA': [1, -1, 1]
               },
               {
-                "name": "dictionary",
-                "count": 3,
-                "VALIDITY": [],
-                "DATA": [0, 1, 2]
+                'name': 'dictionary',
+                'count': 3,
+                'VALIDITY': [],
+                'DATA': [0, 1, 2]
               }
             ]
           }, {
-            "count": 3,
-            "columns": [
+            'count': 3,
+            'columns': [
               {
-                "name": "f32",
-                "count": 3,
-                "VALIDITY": [],
-                "DATA": [0.3, 0.2, 0.1]
+                'name': 'f32',
+                'count': 3,
+                'VALIDITY': [],
+                'DATA': [0.3, 0.2, 0.1]
               },
               {
-                "name": "i32",
-                "count": 3,
-                "VALIDITY": [],
-                "DATA": [-1, 1, -1]
+                'name': 'i32',
+                'count': 3,
+                'VALIDITY': [],
+                'DATA': [-1, 1, -1]
               },
               {
-                "name": "dictionary",
-                "count": 3,
-                "VALIDITY": [],
-                "DATA": [0, 1, 2]
+                'name': 'dictionary',
+                'count': 3,
+                'VALIDITY': [],
+                'DATA': [0, 1, 2]
               }
             ]
           }]
@@ -327,7 +330,7 @@ describe(`Table`, () => {
             [new Float32Array([ 0.3])[0], -1, 'a'],
             [new Float32Array([ 0.2])[0],  1, 'b'],
             [new Float32Array([ 0.1])[0], -1, 'c'],
-        ]
+        ];
         test(`has the correct length`, () => {
             expect(table.length).toEqual(values.length);
         });
@@ -345,7 +348,7 @@ describe(`Table`, () => {
         test(`scans expected values`, () => {
             let expected_idx = 0;
             table.scan((idx, cols) => {
-                expect(cols.map((c)=>c.get(idx))).toEqual(values[expected_idx++]);
+                expect(cols.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
             });
         });
         test(`count() returns the correct length`, () => {
@@ -381,5 +384,8 @@ describe(`Table`, () => {
                 'c': 1,
             });
         });
+        test(`countBy on non dictionary column throws error`, () => {
+            expect(() => { table.countBy('i32'); }).toThrow();
+        });
     });
 });

From e20decd57f92cc6c1fc1e807f11f7bb9d825ebde Mon Sep 17 00:00:00 2001
From: Brian Hulette <brian.hulette@ccri.com>
Date: Tue, 16 Jan 2018 11:11:23 -0500
Subject: [PATCH 19/19] Add license headers

---
 js/src/predicate.ts             | 17 +++++++++++++++++
 js/test/data/tables/generate.py | 17 +++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/js/src/predicate.ts b/js/src/predicate.ts
index 1fedc98d1c41e..a80e56ee599e5 100644
--- a/js/src/predicate.ts
+++ b/js/src/predicate.ts
@@ -1,3 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
 import { Vector } from './vector/vector';
 import { DictionaryVector } from './vector/dictionary';
 
diff --git a/js/test/data/tables/generate.py b/js/test/data/tables/generate.py
index bf663fb0b1f9f..da19c6a0728c0 100644
--- a/js/test/data/tables/generate.py
+++ b/js/test/data/tables/generate.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import pyarrow as pa
 import random
 import numpy as np