From 61dc69902761014e23e56361996acc2215c8f283 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Thu, 11 Jan 2018 02:57:56 -0800 Subject: [PATCH] WIP -- refactor types to closer match arrow-cpp --- js/src/Arrow.externs.ts | 84 ---- js/src/Arrow.ts | 123 ----- js/src/data.ts | 214 +++++++++ js/src/{format => }/fb/File.ts | 0 js/src/{format => }/fb/File_generated.js | 0 js/src/{format => }/fb/Message.ts | 0 js/src/{format => }/fb/Message_generated.js | 0 js/src/{format => }/fb/Schema.ts | 0 js/src/{format => }/fb/Schema_generated.js | 0 js/src/format/arrow.ts | 32 -- js/src/format/fb.ts | 234 --------- js/src/format/json.ts | 173 ------- js/src/format/types.ts | 393 --------------- js/src/ipc/message.ts | 156 ++++++ js/src/reader/arrow.ts | 48 -- js/src/reader/buffer.ts | 229 --------- js/src/reader/json.ts | 176 ------- js/src/reader/vector.ts | 255 ---------- js/src/type.ts | 501 ++++++++++++++++++++ js/src/util/bit.ts | 107 +++++ js/src/util/layout.ts | 2 +- js/src/vector.ts | 257 ++++++++++ js/src/vector/arrow.ts | 88 ---- js/src/vector/dictionary.ts | 41 +- js/src/vector/flat.ts | 173 +++++++ js/src/vector/list.ts | 95 ++-- js/src/vector/nested.ts | 132 ++++++ js/src/vector/numeric.ts | 169 ------- js/src/vector/struct.ts | 127 ----- js/src/vector/table.ts | 61 --- js/src/vector/traits/field.ts | 71 --- js/src/vector/traits/mixins.ts | 75 --- js/src/vector/traits/nullable.ts | 70 --- js/src/vector/traits/nullablefield.ts | 71 --- js/src/vector/traits/vectors.ts | 75 --- js/src/vector/types.ts | 43 -- js/src/vector/utf8.ts | 40 -- js/src/vector/vector.ts | 55 --- js/src/vector/virtual.ts | 129 ----- js/src/visitor.ts | 154 ++++++ 40 files changed, 1765 insertions(+), 2888 deletions(-) delete mode 100644 js/src/Arrow.externs.ts delete mode 100644 js/src/Arrow.ts create mode 100644 js/src/data.ts rename js/src/{format => }/fb/File.ts (100%) rename js/src/{format => }/fb/File_generated.js (100%) rename js/src/{format => }/fb/Message.ts (100%) rename js/src/{format => }/fb/Message_generated.js (100%) rename js/src/{format => }/fb/Schema.ts (100%) rename js/src/{format => }/fb/Schema_generated.js (100%) delete mode 100644 js/src/format/arrow.ts delete mode 100644 js/src/format/fb.ts delete mode 100644 js/src/format/json.ts delete mode 100644 js/src/format/types.ts create mode 100644 js/src/ipc/message.ts delete mode 100644 js/src/reader/arrow.ts delete mode 100644 js/src/reader/buffer.ts delete mode 100644 js/src/reader/json.ts delete mode 100644 js/src/reader/vector.ts create mode 100644 js/src/type.ts create mode 100644 js/src/util/bit.ts create mode 100644 js/src/vector.ts delete mode 100644 js/src/vector/arrow.ts create mode 100644 js/src/vector/flat.ts create mode 100644 js/src/vector/nested.ts delete mode 100644 js/src/vector/numeric.ts delete mode 100644 js/src/vector/struct.ts delete mode 100644 js/src/vector/table.ts delete mode 100644 js/src/vector/traits/field.ts delete mode 100644 js/src/vector/traits/mixins.ts delete mode 100644 js/src/vector/traits/nullable.ts delete mode 100644 js/src/vector/traits/nullablefield.ts delete mode 100644 js/src/vector/traits/vectors.ts delete mode 100644 js/src/vector/types.ts delete mode 100644 js/src/vector/utf8.ts delete mode 100644 js/src/vector/vector.ts delete mode 100644 js/src/vector/virtual.ts create mode 100644 js/src/visitor.ts diff --git a/js/src/Arrow.externs.ts b/js/src/Arrow.externs.ts deleted file mode 100644 index c23930271183d..0000000000000 --- a/js/src/Arrow.externs.ts +++ /dev/null @@ -1,84 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -/** - * @fileoverview Closure Compiler externs for Arrow - * @externs - * @suppress {duplicate,checkTypes} - */ -/** @type {symbol} */ -Symbol.iterator; -/** @type {symbol} */ -Symbol.asyncIterator; - -let RowVector = function() {}; -/** @type {?} */ -RowVector.prototype.toJSON; -/** @type {?} */ -RowVector.prototype.toArray; -/** @type {?} */ -RowVector.prototype.toObject; -/** @type {?} */ -RowVector.prototype.toString; - -let Table = function() {}; -/** @type {?} */ -( Table).from; -/** @type {?} */ -Table.prototype.columns; -/** @type {?} */ -Table.prototype.length; -/** @type {?} */ -Table.prototype.col; -/** @type {?} */ -Table.prototype.key; -/** @type {?} */ -Table.prototype.select; -/** @type {?} */ -Table.prototype.toString; - -let Vector = function() {}; -/** @type {?} */ -Vector.prototype.length; -/** @type {?} */ -Vector.prototype.name; -/** @type {?} */ -Vector.prototype.type; -/** @type {?} */ -Vector.prototype.get; -/** @type {?} */ -Vector.prototype.concat; -/** @type {?} */ -Vector.prototype.slice; -/** @type {?} */ -Vector.prototype.metadata; -/** @type {?} */ -Vector.prototype.nullable; -/** @type {?} */ -Vector.prototype.nullCount; - -let BoolVector = function() {}; -/** @type {?} */ -( BoolVector).pack; -/** @type {?} */ -BoolVector.prototype.set; - -let DictionaryVector = function() {}; -/** @type {?} */ -DictionaryVector.prototype.getKey; -/** @type {?} */ -DictionaryVector.prototype.getValue; diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts deleted file mode 100644 index 3a8943434eece..0000000000000 --- a/js/src/Arrow.ts +++ /dev/null @@ -1,123 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { Table } from './vector/table'; -import { Vector } from './vector/vector'; -import { Utf8Vector } from './vector/utf8'; -import { DictionaryVector } from './vector/dictionary'; -import { StructVector, StructRow } from './vector/struct'; -import { read, readAsync } from './reader/arrow'; -import { Uint64, Int64, Int128 } from './util/int'; -import { ListVector, BinaryVector, FixedSizeListVector } from './vector/list'; - -import { - BoolVector, - Int8Vector, - Int16Vector, - Int32Vector, - Int64Vector, - Uint8Vector, - Uint16Vector, - Uint32Vector, - Uint64Vector, - Float16Vector, - Float32Vector, - Float64Vector, - Date32Vector, - Date64Vector, - Time32Vector, - Time64Vector, - DecimalVector, - TimestampVector, -} from './vector/numeric'; - -// closure compiler always erases static method names: -// https://github.com/google/closure-compiler/issues/1776 -// set them via string indexers to save them from the mangler -Table['from'] = Table.from; -Table['fromAsync'] = Table.fromAsync; -BoolVector['pack'] = BoolVector.pack; - -export { read, readAsync }; -export { Table, Vector, StructRow }; -export { Uint64, Int64, Int128 }; -export { NumericVectorConstructor } from './vector/numeric'; -export { List, TypedArray, TypedArrayConstructor } from './vector/types'; -export { - BoolVector, - ListVector, - Utf8Vector, - Int8Vector, - Int16Vector, - Int32Vector, - Int64Vector, - Uint8Vector, - Uint16Vector, - Uint32Vector, - Uint64Vector, - Date32Vector, - Date64Vector, - Time32Vector, - Time64Vector, - BinaryVector, - StructVector, - Float16Vector, - Float32Vector, - Float64Vector, - DecimalVector, - TimestampVector, - DictionaryVector, - FixedSizeListVector, -}; - -/* These exports are needed for the closure umd targets */ -try { - const Arrow = eval('exports'); - if (typeof Arrow === 'object') { - // string indexers tell closure compiler not to rename these properties - Arrow['read'] = read; - Arrow['readAsync'] = readAsync; - Arrow['Table'] = Table; - Arrow['Vector'] = Vector; - Arrow['StructRow'] = StructRow; - Arrow['BoolVector'] = BoolVector; - Arrow['ListVector'] = ListVector; - Arrow['Utf8Vector'] = Utf8Vector; - Arrow['Int8Vector'] = Int8Vector; - Arrow['Int16Vector'] = Int16Vector; - Arrow['Int32Vector'] = Int32Vector; - Arrow['Int64Vector'] = Int64Vector; - Arrow['Uint8Vector'] = Uint8Vector; - Arrow['Uint16Vector'] = Uint16Vector; - Arrow['Uint32Vector'] = Uint32Vector; - Arrow['Uint64Vector'] = Uint64Vector; - Arrow['Date32Vector'] = Date32Vector; - Arrow['Date64Vector'] = Date64Vector; - Arrow['Time32Vector'] = Time32Vector; - Arrow['Time64Vector'] = Time64Vector; - Arrow['BinaryVector'] = BinaryVector; - Arrow['StructVector'] = StructVector; - Arrow['Float16Vector'] = Float16Vector; - Arrow['Float32Vector'] = Float32Vector; - Arrow['Float64Vector'] = Float64Vector; - Arrow['DecimalVector'] = DecimalVector; - Arrow['TimestampVector'] = TimestampVector; - Arrow['DictionaryVector'] = DictionaryVector; - Arrow['FixedSizeListVector'] = FixedSizeListVector; - } -} catch (e) { /* not the UMD bundle */ } -/* end closure exports */ diff --git a/js/src/data.ts b/js/src/data.ts new file mode 100644 index 0000000000000..5e7fd35431b26 --- /dev/null +++ b/js/src/data.ts @@ -0,0 +1,214 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { VectorLike } from './vector'; +import { VectorType, TypedArray, TypedArrayConstructor } from './type'; +import { DataType, FlatType, ListType, NestedType, Map_, DenseUnion, SparseUnion } from './type'; + +export type Data = DataTypes[T['TType']] & BaseData; +export interface DataTypes { +/* [Type.NONE]*/ 0: BaseData; +/* [Type.Null]*/ 1: FlatData; +/* [Type.Int]*/ 2: FlatData; +/* [Type.Float]*/ 3: FlatData; +/* [Type.Binary]*/ 4: ListData; +/* [Type.Utf8]*/ 5: ListData; +/* [Type.Bool]*/ 6: FlatData; +/* [Type.Decimal]*/ 7: FlatData; +/* [Type.Date]*/ 8: FlatData; +/* [Type.Time]*/ 9: FlatData; +/* [Type.Timestamp]*/ 10: FlatData; +/* [Type.Interval]*/ 11: FlatData; +/* [Type.List]*/ 12: ListData; +/* [Type.Struct]*/ 13: FlatData; +/* [Type.Union]*/ 14: UnionData; +/* [Type.FixedSizeBinary]*/ 15: FlatData; +/* [Type.FixedSizeList]*/ 16: ListData; +/* [Type.Map]*/ 17: NestedData; +/* [Type.DenseUnion]*/ DenseUnion: DenseUnionData; +/*[Type.SparseUnion]*/ SparseUnion: SparseUnionData; +} +// When slicing, we do not know the null count of the sliced range without +// doing some computation. To avoid doing this eagerly, we set the null count +// to -1 (any negative number will do). When Array::null_count is called the +// first time, the null count will be computed. See ARROW-33 +export type kUnknownNullCount = -1; +export const kUnknownNullCount = -1; + +export class BaseData implements VectorLike { + protected _type: T; + protected _length: number; + // @ts-ignore + protected _childData: BaseData[]; + protected _nullCount: number | kUnknownNullCount; + protected /* [VectorType.OFFSET]:*/ 0?: Int32Array; + protected /* [VectorType.DATA]:*/ 1?: T['TArray']; + protected /*[VectorType.VALIDITY]:*/ 2?: Uint8Array; + protected /* [VectorType.TYPE]:*/ 3?: Int8Array; + constructor(type: T, length: number, nullCount?: number) { + this._type = type; + this._length = Math.max(length || 0, 0); + this._nullCount = Math.max(nullCount || 0, -1); + } + public get type() { return this._type; } + public get length() { return this._length; } + public get typeId() { return this._type.TType; } + public get childData() { return this._childData; } + public get nullCount() { return this._nullCount; } + public get nullBitmap() { return this[VectorType.VALIDITY]; } + public clone(length = this._length, nullCount = this._nullCount) { + return new BaseData(this._type, length, nullCount) as this; + } + public slice(offset: number, length: number) { + return length <= 0 ? this : this.sliceInternal(this.clone( + length, +(this._nullCount === 0) - 1 + ), offset, length); + } + protected sliceInternal(clone: this, offset: number, length: number) { + let arr: any; + // If typeIds exist, slice the typeIds buffer + (arr = this[VectorType.TYPE]) && (clone[VectorType.TYPE] = this.sliceData(arr, offset, length)); + // If a null bitmap exists, slice the null bitmap + (arr = this[VectorType.VALIDITY]) && (clone[VectorType.VALIDITY] = this.sliceNullBitmap(arr, offset, length)); + // If offsets exist, only slice the offsets buffer + (arr = this[VectorType.OFFSET]) && (clone[VectorType.OFFSET] = this.sliceOffsets(arr, offset, length)) || + // Otherwise if no offsets, slice the data buffer + (arr = this[VectorType.DATA]) && (clone[VectorType.DATA] = this.sliceData(arr, offset, length)); + return clone; + } + protected sliceData(data: T['TArray'] & TypedArray, offset: number, length: number) { + return data.subarray(offset, offset + length); + } + protected sliceOffsets(valueOffsets: Int32Array, offset: number, length: number) { + return valueOffsets.subarray(offset, offset + length + 1); + } + protected sliceNullBitmap(nullBitmap: Uint8Array, offset: number, length: number) { + return length >= 8 + ? nullBitmap.subarray(offset >> 3, ((offset + length) >> 3)) + : nullBitmap.subarray(offset >> 3, ((offset + length) >> 3) + 1); + } +} + +export class FlatData extends BaseData { + public /* [VectorType.DATA]:*/ 1: T['TArray']; + public /*[VectorType.VALIDITY]:*/ 2: Uint8Array; + public get values() { return this[VectorType.DATA]; } + constructor(type: T, length: number, nullBitmap: Uint8Array, data: Iterable, nullCount?: number) { + super(type, length, nullCount); + this[VectorType.DATA] = toTypedArray(this.ArrayType, data); + this[VectorType.VALIDITY] = toTypedArray(Uint8Array, nullBitmap); + } + public get ArrayType(): T['ArrayType'] { return this._type.ArrayType; } + public clone(length = this._length, nullCount = this._nullCount) { + return new FlatData( + this._type, length, this[VectorType.VALIDITY], + this[VectorType.DATA], nullCount + ) as this; + } +} + +export class ListData extends BaseData { + public /* [VectorType.OFFSET]:*/ 0: Int32Array; + public /* [VectorType.DATA]:*/ 1: T['TArray']; + public /*[VectorType.VALIDITY]:*/ 2: Uint8Array; + public get values() { return this[VectorType.DATA]; } + public get valueOffsets() { return this[VectorType.OFFSET]; } + constructor(type: T, length: number, nullBitmap: Uint8Array, data: T['TArray'], valueOffsets: Iterable, nullCount?: number) { + super(type, length, nullCount); + this[VectorType.DATA] = data; + this[VectorType.OFFSET] = toTypedArray(Int32Array, valueOffsets); + this[VectorType.VALIDITY] = toTypedArray(Uint8Array, nullBitmap); + } + public clone(length = this._length, nullCount = this._nullCount) { + return new ListData( + this._type, length, this[VectorType.VALIDITY], + this[VectorType.DATA], this[VectorType.OFFSET], + nullCount + ) as this; + } +} + +export class NestedData extends BaseData { + public /*[VectorType.VALIDITY]:*/ 2: Uint8Array; + constructor(type: T, length: number, nullBitmap: Uint8Array, childData: BaseData[], nullCount?: number) { + super(type, length, nullCount); + this._childData = childData; + this[VectorType.VALIDITY] = nullBitmap; + } + public clone(length = this._length, nullCount = this._nullCount) { + return new NestedData( + this._type, length, + this[VectorType.VALIDITY], + this._childData, nullCount + ) as this; + } + protected sliceInternal(clone: this, offset: number, length: number) { + if (!this[VectorType.OFFSET]) { + clone._childData = this._childData.map((child) => child.slice(offset, length)); + } + return super.sliceInternal(clone, offset, length); + } +} + +export class UnionData extends NestedData { + public /* [VectorType.TYPE]:*/ 3: T['TArray']; + public get typeIds() { return this[VectorType.TYPE]; } + constructor(type: T, length: number, nullBitmap: Uint8Array, typeIds: Iterable, childData: BaseData[], nullCount?: number) { + super(type, length, nullBitmap, childData, nullCount); + this[VectorType.TYPE] = toTypedArray(Int8Array, typeIds); + } + public clone(length = this._length, nullCount = this._nullCount) { + return new UnionData( + this._type, length, this[VectorType.VALIDITY], + this[VectorType.TYPE], this._childData, nullCount + ) as this; + } +} + +export class DenseUnionData extends UnionData { + public /* [VectorType.OFFSET]:*/ 0: Int32Array; + public get valueOffsets() { return this[VectorType.OFFSET]; } + constructor(type: DenseUnion, length: number, nullBitmap: Uint8Array, typeIds: Iterable, valueOffsets: Iterable, childData: BaseData[], nullCount?: number) { + super(type, length, nullBitmap, typeIds, childData, nullCount); + this[VectorType.OFFSET] = toTypedArray(Int32Array, valueOffsets); + } + public clone(length = this._length, nullCount = this._nullCount) { + return new DenseUnionData( + this._type, length, this[VectorType.VALIDITY], + this[VectorType.TYPE], this[VectorType.OFFSET], + this._childData, nullCount + ) as this; + } +} + +export class SparseUnionData extends UnionData { + constructor(type: SparseUnion, length: number, nullBitmap: Uint8Array, typeIds: Iterable, childData: BaseData[], nullCount?: number) { + super(type, length, nullBitmap, typeIds, childData, nullCount); + } + public clone(length = this._length, nullCount = this._nullCount) { + return new SparseUnionData( + this._type, length, this[VectorType.VALIDITY], + this[VectorType.TYPE], this._childData, nullCount + ) as this; + } +} + +function toTypedArray(ArrayType: TypedArrayConstructor, values?: T | ArrayLike | Iterable | null): T { + return values instanceof ArrayType ? values + : !values || !ArrayBuffer.isView(values) ? ArrayType.from(values || []) + : new ArrayType(values.buffer, values.byteOffset, values.byteLength / ArrayType.BYTES_PER_ELEMENT); +} diff --git a/js/src/format/fb/File.ts b/js/src/fb/File.ts similarity index 100% rename from js/src/format/fb/File.ts rename to js/src/fb/File.ts diff --git a/js/src/format/fb/File_generated.js b/js/src/fb/File_generated.js similarity index 100% rename from js/src/format/fb/File_generated.js rename to js/src/fb/File_generated.js diff --git a/js/src/format/fb/Message.ts b/js/src/fb/Message.ts similarity index 100% rename from js/src/format/fb/Message.ts rename to js/src/fb/Message.ts diff --git a/js/src/format/fb/Message_generated.js b/js/src/fb/Message_generated.js similarity index 100% rename from js/src/format/fb/Message_generated.js rename to js/src/fb/Message_generated.js diff --git a/js/src/format/fb/Schema.ts b/js/src/fb/Schema.ts similarity index 100% rename from js/src/format/fb/Schema.ts rename to js/src/fb/Schema.ts diff --git a/js/src/format/fb/Schema_generated.js b/js/src/fb/Schema_generated.js similarity index 100% rename from js/src/format/fb/Schema_generated.js rename to js/src/fb/Schema_generated.js diff --git a/js/src/format/arrow.ts b/js/src/format/arrow.ts deleted file mode 100644 index 14adf9040a47f..0000000000000 --- a/js/src/format/arrow.ts +++ /dev/null @@ -1,32 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { footerFromByteBuffer, messageFromByteBuffer } from './fb'; -import { schemaFromJSON, recordBatchFromJSON, dictionaryBatchFromJSON } from './json'; -import { - IntBitWidth, TimeBitWidth, - VisitorNode, Visitor, Footer, Block, Message, Schema, RecordBatch, DictionaryBatch, Field, DictionaryEncoding, Buffer, FieldNode, - Null, Int, FloatingPoint, Binary, Bool, Utf8, Decimal, Date, Time, Timestamp, Interval, List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_, -} from './types'; - -export { - IntBitWidth, TimeBitWidth, - footerFromByteBuffer, messageFromByteBuffer, - schemaFromJSON, recordBatchFromJSON, dictionaryBatchFromJSON, - VisitorNode, Visitor, Footer, Block, Message, Schema, RecordBatch, DictionaryBatch, Field, DictionaryEncoding, Buffer, FieldNode, - Null, Int, FloatingPoint, Binary, Bool, Utf8, Decimal, Date, Time, Timestamp, Interval, List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_ as Map, -}; diff --git a/js/src/format/fb.ts b/js/src/format/fb.ts deleted file mode 100644 index fdf7f7b0ed99a..0000000000000 --- a/js/src/format/fb.ts +++ /dev/null @@ -1,234 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import * as File_ from './fb/File'; -import * as Schema_ from './fb/Schema'; -import * as Message_ from './fb/Message'; -import { flatbuffers } from 'flatbuffers'; -import ByteBuffer = flatbuffers.ByteBuffer; -import Type = Schema_.org.apache.arrow.flatbuf.Type; -import MessageHeader = Message_.org.apache.arrow.flatbuf.MessageHeader; -import MetadataVersion = Schema_.org.apache.arrow.flatbuf.MetadataVersion; -import _Footer = File_.org.apache.arrow.flatbuf.Footer; -import _Block = File_.org.apache.arrow.flatbuf.Block; -import _Message = Message_.org.apache.arrow.flatbuf.Message; -import _Schema = Schema_.org.apache.arrow.flatbuf.Schema; -import _Field = Schema_.org.apache.arrow.flatbuf.Field; -import _RecordBatch = Message_.org.apache.arrow.flatbuf.RecordBatch; -import _DictionaryBatch = Message_.org.apache.arrow.flatbuf.DictionaryBatch; -import _FieldNode = Message_.org.apache.arrow.flatbuf.FieldNode; -import _Buffer = Schema_.org.apache.arrow.flatbuf.Buffer; -import _DictionaryEncoding = Schema_.org.apache.arrow.flatbuf.DictionaryEncoding; -import _Null = Schema_.org.apache.arrow.flatbuf.Null; -import _Int = Schema_.org.apache.arrow.flatbuf.Int; -import _FloatingPoint = Schema_.org.apache.arrow.flatbuf.FloatingPoint; -import _Binary = Schema_.org.apache.arrow.flatbuf.Binary; -import _Bool = Schema_.org.apache.arrow.flatbuf.Bool; -import _Utf8 = Schema_.org.apache.arrow.flatbuf.Utf8; -import _Decimal = Schema_.org.apache.arrow.flatbuf.Decimal; -import _Date = Schema_.org.apache.arrow.flatbuf.Date; -import _Time = Schema_.org.apache.arrow.flatbuf.Time; -import _Timestamp = Schema_.org.apache.arrow.flatbuf.Timestamp; -import _Interval = Schema_.org.apache.arrow.flatbuf.Interval; -import _List = Schema_.org.apache.arrow.flatbuf.List; -import _Struct = Schema_.org.apache.arrow.flatbuf.Struct_; -import _Union = Schema_.org.apache.arrow.flatbuf.Union; -import _FixedSizeBinary = Schema_.org.apache.arrow.flatbuf.FixedSizeBinary; -import _FixedSizeList = Schema_.org.apache.arrow.flatbuf.FixedSizeList; -import _Map = Schema_.org.apache.arrow.flatbuf.Map; - -import { - IntBitWidth, TimeBitWidth, - Footer, Block, Schema, RecordBatch, DictionaryBatch, Field, DictionaryEncoding, Buffer, FieldNode, - Null, Int, FloatingPoint, Binary, Bool, Utf8, Decimal, Date, Time, Timestamp, Interval, List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_, -} from './types'; - -export function footerFromByteBuffer(bb: ByteBuffer) { - const f = _Footer.getRootAsFooter(bb), s = f.schema()!; - return new Footer( - dictionaryBatchesFromFooter(f), recordBatchesFromFooter(f), - new Schema(f.version(), fieldsFromSchema(s), customMetadata(s), s.endianness()) - ); -} - -export function messageFromByteBuffer(bb: ByteBuffer) { - const m = _Message.getRootAsMessage(bb)!, type = m.headerType(), version = m.version(); - switch (type) { - case MessageHeader.Schema: return schemaFromMessage(version, m.header(new _Schema())!); - case MessageHeader.RecordBatch: return recordBatchFromMessage(version, m.header(new _RecordBatch())!); - case MessageHeader.DictionaryBatch: return dictionaryBatchFromMessage(version, m.header(new _DictionaryBatch())!); - } - return null; - // throw new Error(`Unrecognized Message type '${type}'`); -} - -function schemaFromMessage(version: MetadataVersion, s: _Schema) { - return new Schema(version, fieldsFromSchema(s), customMetadata(s), s.endianness()); -} - -function recordBatchFromMessage(version: MetadataVersion, b: _RecordBatch) { - return new RecordBatch(version, b.length(), fieldNodesFromRecordBatch(b), buffersFromRecordBatch(b, version)); -} - -function dictionaryBatchFromMessage(version: MetadataVersion, d: _DictionaryBatch) { - return new DictionaryBatch(version, recordBatchFromMessage(version, d.data()!), d.id(), d.isDelta()); -} - -function dictionaryBatchesFromFooter(f: _Footer) { - const blocks = [] as Block[]; - for (let b: _Block, i = -1, n = f && f.dictionariesLength(); ++i < n;) { - if (b = f.dictionaries(i)!) { - blocks.push(new Block(b.metaDataLength(), b.bodyLength(), b.offset())); - } - } - return blocks; -} - -function recordBatchesFromFooter(f: _Footer) { - const blocks = [] as Block[]; - for (let b: _Block, i = -1, n = f && f.recordBatchesLength(); ++i < n;) { - if (b = f.recordBatches(i)!) { - blocks.push(new Block(b.metaDataLength(), b.bodyLength(), b.offset())); - } - } - return blocks; -} - -function fieldsFromSchema(s: _Schema) { - const fields = [] as Field[]; - for (let i = -1, n = s && s.fieldsLength(); ++i < n;) { - fields.push(field(s.fields(i)!)); - } - return fields; -} - -function fieldsFromField(f: _Field) { - const fields = [] as Field[]; - for (let i = -1, n = f && f.childrenLength(); ++i < n;) { - fields.push(field(f.children(i)!)); - } - return fields; -} - -function fieldNodesFromRecordBatch(b: _RecordBatch) { - const fieldNodes = [] as FieldNode[]; - for (let i = -1, n = b.nodesLength(); ++i < n;) { - fieldNodes.push(fieldNodeFromRecordBatch(b.nodes(i)!)); - } - return fieldNodes; -} - -function buffersFromRecordBatch(b: _RecordBatch, version: MetadataVersion) { - const buffers = [] as Buffer[]; - for (let i = -1, n = b.buffersLength(); ++i < n;) { - let buffer = b.buffers(i)!; - // If this Arrow buffer was written before version 4, - // advance the buffer's bb_pos 8 bytes to skip past - // the now-removed page id field. - if (version < MetadataVersion.V4) { - buffer.bb_pos += (8 * (i + 1)); - } - buffers.push(bufferFromRecordBatch(buffer)); - } - return buffers; -} - -function field(f: _Field) { - return new Field( - f.name()!, - typeFromField(f), - f.typeType(), - f.nullable(), - fieldsFromField(f), - customMetadata(f), - dictionaryEncodingFromField(f) - ); -} - -function dictionaryEncodingFromField(f: _Field) { - let t: _Int | null; - let e: _DictionaryEncoding | null; - if (e = f.dictionary()) { - if (t = e.indexType()) { - return new DictionaryEncoding(new Int(t.isSigned(), t.bitWidth() as IntBitWidth), e.id(), e.isOrdered()); - } - return new DictionaryEncoding(null, e.id(), e.isOrdered()); - } - return undefined; -} - -function customMetadata(parent?: _Schema | _Field | null) { - const data = new Map(); - if (parent) { - for (let entry, key, i = -1, n = parent.customMetadataLength() | 0; ++i < n;) { - if ((entry = parent.customMetadata(i)) && (key = entry.key()) != null) { - data.set(key, entry.value()!); - } - } - } - return data; -} - -function fieldNodeFromRecordBatch(f: _FieldNode) { - return new FieldNode(f.length(), f.nullCount()); -} - -function bufferFromRecordBatch(b: _Buffer) { - return new Buffer(b.offset(), b.length()); -} - -function typeFromField(f: _Field) { - switch (f.typeType()) { - case Type.NONE: return nullFromField(f.type(new _Null())!); - case Type.Null: return nullFromField(f.type(new _Null())!); - case Type.Int: return intFromField(f.type(new _Int())!); - case Type.FloatingPoint: return floatingPointFromField(f.type(new _FloatingPoint())!); - case Type.Binary: return binaryFromField(f.type(new _Binary())!); - case Type.Utf8: return utf8FromField(f.type(new _Utf8())!); - case Type.Bool: return boolFromField(f.type(new _Bool())!); - case Type.Decimal: return decimalFromField(f.type(new _Decimal())!); - case Type.Date: return dateFromField(f.type(new _Date())!); - case Type.Time: return timeFromField(f.type(new _Time())!); - case Type.Timestamp: return timestampFromField(f.type(new _Timestamp())!); - case Type.Interval: return intervalFromField(f.type(new _Interval())!); - case Type.List: return listFromField(f.type(new _List())!); - case Type.Struct_: return structFromField(f.type(new _Struct())!); - case Type.Union: return unionFromField(f.type(new _Union())!); - case Type.FixedSizeBinary: return fixedSizeBinaryFromField(f.type(new _FixedSizeBinary())!); - case Type.FixedSizeList: return fixedSizeListFromField(f.type(new _FixedSizeList())!); - case Type.Map: return mapFromField(f.type(new _Map())!); - } - throw new Error(`Unrecognized type ${f.typeType()}`); -} - -function nullFromField(_type: _Null) { return new Null(); } -function intFromField(_type: _Int) { return new Int(_type.isSigned(), _type.bitWidth() as IntBitWidth); } -function floatingPointFromField(_type: _FloatingPoint) { return new FloatingPoint(_type.precision()); } -function binaryFromField(_type: _Binary) { return new Binary(); } -function utf8FromField(_type: _Utf8) { return new Utf8(); } -function boolFromField(_type: _Bool) { return new Bool(); } -function decimalFromField(_type: _Decimal) { return new Decimal(_type.scale(), _type.precision()); } -function dateFromField(_type: _Date) { return new Date(_type.unit()); } -function timeFromField(_type: _Time) { return new Time(_type.unit(), _type.bitWidth() as TimeBitWidth); } -function timestampFromField(_type: _Timestamp) { return new Timestamp(_type.unit(), _type.timezone()); } -function intervalFromField(_type: _Interval) { return new Interval(_type.unit()); } -function listFromField(_type: _List) { return new List(); } -function structFromField(_type: _Struct) { return new Struct(); } -function unionFromField(_type: _Union) { return new Union(_type.mode(), (_type.typeIdsArray() || []) as Type[]); } -function fixedSizeBinaryFromField(_type: _FixedSizeBinary) { return new FixedSizeBinary(_type.byteWidth()); } -function fixedSizeListFromField(_type: _FixedSizeList) { return new FixedSizeList(_type.listSize()); } -function mapFromField(_type: _Map) { return new Map_(_type.keysSorted()); } diff --git a/js/src/format/json.ts b/js/src/format/json.ts deleted file mode 100644 index 3da3db6d5fea3..0000000000000 --- a/js/src/format/json.ts +++ /dev/null @@ -1,173 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import * as Schema_ from './fb/Schema'; -import { flatbuffers } from 'flatbuffers'; -import Long = flatbuffers.Long; -import MetadataVersion = Schema_.org.apache.arrow.flatbuf.MetadataVersion; -import Type = Schema_.org.apache.arrow.flatbuf.Type; -import DateUnit = Schema_.org.apache.arrow.flatbuf.DateUnit; -import TimeUnit = Schema_.org.apache.arrow.flatbuf.TimeUnit; -import Precision = Schema_.org.apache.arrow.flatbuf.Precision; -import IntervalUnit = Schema_.org.apache.arrow.flatbuf.IntervalUnit; -import { - IntBitWidth, TimeBitWidth, - Schema, RecordBatch, DictionaryBatch, Field, DictionaryEncoding, Buffer, FieldNode, - Null, Int, FloatingPoint, Binary, Bool, Utf8, Decimal, Date, Time, Timestamp, Interval, List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_, -} from './types'; - -export function schemaFromJSON(s: any): Schema { - // todo: metadataFromJSON - return new Schema( - MetadataVersion.V4, - fieldsFromJSON(s['fields']), - customMetadata(s['customMetadata']) - ); -} - -export function recordBatchFromJSON(b: any): RecordBatch { - return new RecordBatch( - MetadataVersion.V4, - new Long(b['count'], 0), - fieldNodesFromJSON(b['columns']), - buffersFromJSON(b['columns']) - ); -} - -export function dictionaryBatchFromJSON(b: any): DictionaryBatch { - return new DictionaryBatch( - MetadataVersion.V4, - recordBatchFromJSON(b['data']), - new Long(b['id'], 0), b['isDelta'] - ); -} - -function fieldsFromJSON(fs: any[]): Field[] { - return (fs || []).map(fieldFromJSON); -} - -function fieldNodesFromJSON(xs: any[]): FieldNode[] { - return (xs || []).reduce((fieldNodes, column: any) => [ - ...fieldNodes, - new FieldNode( - new Long(column['count'], 0), - new Long(nullCountFromJSON(column['VALIDITY']), 0) - ), - ...fieldNodesFromJSON(column['children']) - ], [] as FieldNode[]); -} - -function buffersFromJSON(xs: any[], buffers: Buffer[] = []): Buffer[] { - for (let i = -1, n = (xs || []).length; ++i < n;) { - const column = xs[i]; - column['VALIDITY'] && buffers.push(new Buffer(new Long(buffers.length, 0), new Long(column['VALIDITY'].length, 0))); - column['OFFSET'] && buffers.push(new Buffer(new Long(buffers.length, 0), new Long(column['OFFSET'].length, 0))); - column['DATA'] && buffers.push(new Buffer(new Long(buffers.length, 0), new Long(column['DATA'].length, 0))); - buffers = buffersFromJSON(column['children'], buffers); - } - return buffers; -} - -function nullCountFromJSON(validity: number[]) { - return (validity || []).reduce((sum, val) => sum + +(val === 0), 0); -} - -function fieldFromJSON(f: any) { - return new Field( - f['name'], - typeFromJSON(f['type']), - namesToTypeMap[f['type']['name']], - f.nullable, - fieldsFromJSON(f['children']), - customMetadata(f['customMetadata']), - dictionaryEncodingFromJSON(f['dictionary']) - ); -} - -function dictionaryEncodingFromJSON(d: any) { - return !d ? null : new DictionaryEncoding( - d.indexType ? intFromJSON(d.indexType) : null, - new Long(d.id, 0), d.isOrdered - ); -} - -function customMetadata(metadata?: any) { - return new Map(Object.entries(metadata || {})); -} - -const namesToTypeMap: { [n: string]: Type } = { - 'NONE': Type.NONE, - 'null': Type.Null, - 'int': Type.Int, - 'floatingpoint': Type.FloatingPoint, - 'binary': Type.Binary, - 'bool': Type.Bool, - 'utf8': Type.Utf8, - 'decimal': Type.Decimal, - 'date': Type.Date, - 'time': Type.Time, - 'timestamp': Type.Timestamp, - 'interval': Type.Interval, - 'list': Type.List, - 'struct': Type.Struct_, - 'union': Type.Union, - 'fixedsizebinary': Type.FixedSizeBinary, - 'fixedsizelist': Type.FixedSizeList, - 'map': Type.Map, -}; - -function typeFromJSON(t: any) { - switch (namesToTypeMap[t['name']]) { - case Type.NONE: return nullFromJSON(t); - case Type.Null: return nullFromJSON(t); - case Type.Int: return intFromJSON(t); - case Type.FloatingPoint: return floatingPointFromJSON(t); - case Type.Binary: return binaryFromJSON(t); - case Type.Utf8: return utf8FromJSON(t); - case Type.Bool: return boolFromJSON(t); - case Type.Decimal: return decimalFromJSON(t); - case Type.Date: return dateFromJSON(t); - case Type.Time: return timeFromJSON(t); - case Type.Timestamp: return timestampFromJSON(t); - case Type.Interval: return intervalFromJSON(t); - case Type.List: return listFromJSON(t); - case Type.Struct_: return structFromJSON(t); - case Type.Union: return unionFromJSON(t); - case Type.FixedSizeBinary: return fixedSizeBinaryFromJSON(t); - case Type.FixedSizeList: return fixedSizeListFromJSON(t); - case Type.Map: return mapFromJSON(t); - } - throw new Error(`Unrecognized type ${t['name']}`); -} - -function nullFromJSON(_type: any) { return new Null(); } -function intFromJSON(_type: any) { return new Int(_type['isSigned'], _type['bitWidth'] as IntBitWidth); } -function floatingPointFromJSON(_type: any) { return new FloatingPoint(Precision[_type['precision']] as any); } -function binaryFromJSON(_type: any) { return new Binary(); } -function utf8FromJSON(_type: any) { return new Utf8(); } -function boolFromJSON(_type: any) { return new Bool(); } -function decimalFromJSON(_type: any) { return new Decimal(_type['scale'], _type['precision']); } -function dateFromJSON(_type: any) { return new Date(DateUnit[_type['unit']] as any); } -function timeFromJSON(_type: any) { return new Time(TimeUnit[_type['unit']] as any, _type['bitWidth'] as TimeBitWidth); } -function timestampFromJSON(_type: any) { return new Timestamp(TimeUnit[_type['unit']] as any, _type['timezone']); } -function intervalFromJSON(_type: any) { return new Interval(IntervalUnit[_type['unit']] as any); } -function listFromJSON(_type: any) { return new List(); } -function structFromJSON(_type: any) { return new Struct(); } -function unionFromJSON(_type: any) { return new Union(_type['mode'], (_type['typeIdsArray'] || []) as Type[]); } -function fixedSizeBinaryFromJSON(_type: any) { return new FixedSizeBinary(_type['byteWidth']); } -function fixedSizeListFromJSON(_type: any) { return new FixedSizeList(_type['listSize']); } -function mapFromJSON(_type: any) { return new Map_(_type['keysSorted']); } diff --git a/js/src/format/types.ts b/js/src/format/types.ts deleted file mode 100644 index 09df8ccbbdf7c..0000000000000 --- a/js/src/format/types.ts +++ /dev/null @@ -1,393 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -/* tslint:disable:class-name */ - -import { align } from '../util/layout'; -import * as Schema_ from './fb/Schema'; -import * as Message_ from './fb/Message'; -import { flatbuffers } from 'flatbuffers'; -import Long = flatbuffers.Long; -import Type = Schema_.org.apache.arrow.flatbuf.Type; -import DateUnit = Schema_.org.apache.arrow.flatbuf.DateUnit; -import TimeUnit = Schema_.org.apache.arrow.flatbuf.TimeUnit; -import Precision = Schema_.org.apache.arrow.flatbuf.Precision; -import UnionMode = Schema_.org.apache.arrow.flatbuf.UnionMode; -import Endianness = Schema_.org.apache.arrow.flatbuf.Endianness; -import IntervalUnit = Schema_.org.apache.arrow.flatbuf.IntervalUnit; -import MessageHeader = Message_.org.apache.arrow.flatbuf.MessageHeader; -import MetadataVersion = Schema_.org.apache.arrow.flatbuf.MetadataVersion; - -export type IntBitWidth = 8 | 16 | 32 | 64; -export type TimeBitWidth = IntBitWidth | 128; - -export interface VisitorNode { - accept(visitor: Visitor): any; -} - -export abstract class Visitor { - visit(node: VisitorNode): T { - return node.accept(this); - } - visitMany(nodes: VisitorNode[]): T[] { - return nodes.map((node) => this.visit(node)); - } - abstract visitFooter(node: Footer): any; - abstract visitBlock(node: Block): any; - abstract visitMessage(node: Message): any; - abstract visitSchema(node: Schema): any; - abstract visitField(node: Field): any; - abstract visitBuffer(node: Buffer): any; - abstract visitFieldNode(node: FieldNode): any; - abstract visitRecordBatch(node: RecordBatch): any; - abstract visitDictionaryBatch(node: DictionaryBatch): any; - abstract visitDictionaryEncoding(node: DictionaryEncoding): any; - abstract visitNullFieldType(node: Null): any; - abstract visitIntFieldType(node: Int): any; - abstract visitFloatingPointFieldType(node: FloatingPoint): any; - abstract visitBinaryFieldType(node: Binary): any; - abstract visitBoolFieldType(node: Bool): any; - abstract visitUtf8FieldType(node: Utf8): any; - abstract visitDecimalFieldType(node: Decimal): any; - abstract visitDateFieldType(node: Date): any; - abstract visitTimeFieldType(node: Time): any; - abstract visitTimestampFieldType(node: Timestamp): any; - abstract visitIntervalFieldType(node: Interval): any; - abstract visitListFieldType(node: List): any; - abstract visitStructFieldType(node: Struct): any; - abstract visitUnionFieldType(node: Union): any; - abstract visitFixedSizeBinaryFieldType(node: FixedSizeBinary): any; - abstract visitFixedSizeListFieldType(node: FixedSizeList): any; - abstract visitMapFieldType(node: Map_): any; -} - -export class Footer implements VisitorNode { - constructor(public dictionaryBatches: Block[], public recordBatches: Block[], public schema: Schema) {} - accept(visitor: Visitor): any { - return visitor.visitFooter(this); - } -} - -export class Block implements VisitorNode { - constructor(public metaDataLength: number, public bodyLength: Long, public offset: Long) {} - accept(visitor: Visitor): any { - return visitor.visitBlock(this); - } -} - -export class Message implements VisitorNode { - constructor(public version: MetadataVersion, public bodyLength: Long, public headerType: MessageHeader) {} - isSchema(): this is Schema { return this.headerType === MessageHeader.Schema; } - isRecordBatch(): this is RecordBatch { return this.headerType === MessageHeader.RecordBatch; } - isDictionaryBatch(): this is DictionaryBatch { return this.headerType === MessageHeader.DictionaryBatch; } - accept(visitor: Visitor): any { - visitor.visitMessage(this); - } -} - -export class Schema extends Message { - public dictionaries: Map; - constructor(version: MetadataVersion, public fields: Field[], public customMetadata?: Map, public endianness = Endianness.Little) { - super(version, Long.ZERO, MessageHeader.Schema); - const dictionaries = [] as Field[]; - for (let f: Field, i = -1, n = fields.length; ++i < n;) { - if ((f = fields[i])) { - f.dictionary && dictionaries.push(f); - dictionaries.push(...f.dictionaries); - } - } - this.dictionaries = new Map(dictionaries.map<[string, Field]>((f) => [ - f.dictionary!.dictionaryId.toFloat64().toString(), f - ])); - } - accept(visitor: Visitor): any { - return visitor.visitSchema(this); - } -} - -export class RecordBatch extends Message { - constructor(version: MetadataVersion, public length: Long, public fieldNodes: FieldNode[], public buffers: Buffer[]) { - super(version, new Long(buffers.reduce((s, b) => align(s + b.length.low + (b.offset.low - s), 8), 0), 0), MessageHeader.RecordBatch); - } - accept(visitor: Visitor) { - return visitor.visitRecordBatch(this); - } -} - -export class DictionaryBatch extends Message { - constructor(version: MetadataVersion, public dictionary: RecordBatch, public dictionaryId: Long, public isDelta: boolean) { - super(version, dictionary.bodyLength, MessageHeader.DictionaryBatch); - } - get fieldNodes(): FieldNode[] { return this.dictionary.fieldNodes; } - get buffers(): Buffer[] { return this.dictionary.buffers; } - accept(visitor: Visitor) { - return visitor.visitDictionaryBatch(this); - } - static atomicDictionaryId = 0; -} - -export class Field implements VisitorNode { - public dictionaries: Field[]; - constructor(public name: string, - public type: FieldType, - public typeType: Type, - public nullable = false, - public children: Field[] = [], - public metadata?: Map | null, - public dictionary?: DictionaryEncoding | null) { - const dictionaries = [] as Field[]; - for (let f: Field, i = -1, n = children.length; ++i < n;) { - if ((f = children[i])) { - f.dictionary && dictionaries.push(f); - dictionaries.push(...f.dictionaries); - } - } - this.dictionaries = dictionaries; - } - accept(visitor: Visitor): any { - return visitor.visitField(this); - } - indexField() { - return !this.dictionary ? this : new Field( - this.name, - this.dictionary.indexType, this.dictionary.indexType.type, - this.nullable, this.children, this.metadata, this.dictionary - ); - } - toString() { return `Field name[${this.name}], nullable[${this.nullable}], type[${this.type.toString()}]`; } -} - -export class Buffer implements VisitorNode { - constructor(public offset: Long, public length: Long) {} - accept(visitor: Visitor) { - return visitor.visitBuffer(this); - } -} - -export class FieldNode implements VisitorNode { - constructor(public length: Long, public nullCount: Long) {} - accept(visitor: Visitor) { - return visitor.visitFieldNode(this); - } -} - -export class DictionaryEncoding implements VisitorNode { - public isOrdered: boolean; - public dictionaryId: Long; - public indexType: Int; - constructor(indexType?: Int | null, dictionaryId?: Long | null, isOrdered?: boolean | null) { - this.isOrdered = isOrdered || false; - /* a dictionary index defaults to signed 32 bit int if unspecified */ - this.indexType = indexType || new Int(true, 32); - this.dictionaryId = dictionaryId || new Long(DictionaryBatch.atomicDictionaryId++, 0); - } - accept(visitor: Visitor): any { - return visitor.visitDictionaryEncoding(this); - } -} - -export abstract class FieldType implements VisitorNode { - constructor(public type: Type) {} - abstract accept(visitor: Visitor): any; - isNull(): this is Null { return this.type === Type.Null; } - isInt(): this is Int { return this.type === Type.Int; } - isFloatingPoint(): this is FloatingPoint { return this.type === Type.FloatingPoint; } - isBinary(): this is Binary { return this.type === Type.Binary; } - isUtf8(): this is Utf8 { return this.type === Type.Utf8; } - isBool(): this is Bool { return this.type === Type.Bool; } - isDecimal(): this is Decimal { return this.type === Type.Decimal; } - isDate(): this is Date { return this.type === Type.Date; } - isTime(): this is Time { return this.type === Type.Time; } - isTimestamp(): this is Timestamp { return this.type === Type.Timestamp; } - isInterval(): this is Interval { return this.type === Type.Interval; } - isList(): this is List { return this.type === Type.List; } - isStruct(): this is Struct { return this.type === Type.Struct_; } - isUnion(): this is Union { return this.type === Type.Union; } - isFixedSizeBinary(): this is FixedSizeBinary { return this.type === Type.FixedSizeBinary; } - isFixedSizeList(): this is FixedSizeList { return this.type === Type.FixedSizeList; } - isMap(): this is Map_ { return this.type === Type.Map; } -} - -export class Null extends FieldType { - toString() { return `Null`; } - constructor() { - super(Type.Null); - } - accept(visitor: Visitor) { - return visitor.visitNullFieldType(this); - } -} - -export class Int extends FieldType { - toString() { return `Int isSigned[${this.isSigned}], bitWidth[${this.bitWidth}]`; } - constructor(public isSigned: boolean, public bitWidth: IntBitWidth) { - super(Type.Int); - } - accept(visitor: Visitor) { - return visitor.visitIntFieldType(this); - } -} - -export class FloatingPoint extends FieldType { - toString() { return `FloatingPoint precision`; } - constructor(public precision: Precision) { - super(Type.FloatingPoint); - } - accept(visitor: Visitor) { - return visitor.visitFloatingPointFieldType(this); - } -} - -export class Binary extends FieldType { - toString() { return `Binary`; } - constructor() { - super(Type.Binary); - } - accept(visitor: Visitor) { - return visitor.visitBinaryFieldType(this); - } -} - -export class Utf8 extends FieldType { - toString() { return `Utf8`; } - constructor() { - super(Type.Utf8); - } - accept(visitor: Visitor) { - return visitor.visitUtf8FieldType(this); - } -} - -export class Bool extends FieldType { - toString() { return `Bool`; } - constructor() { - super(Type.Bool); - } - accept(visitor: Visitor) { - return visitor.visitBoolFieldType(this); - } -} - -export class Decimal extends FieldType { - toString() { return `Decimal scale[${this.scale}], precision[${this.precision}]`; } - constructor(public scale: number, public precision: number) { - super(Type.Decimal); - } - accept(visitor: Visitor) { - return visitor.visitDecimalFieldType(this); - } -} - -export class Date extends FieldType { - toString() { return `Date unit[${this.unit}]`; } - constructor(public unit: DateUnit) { - super(Type.Date); - } - accept(visitor: Visitor) { - return visitor.visitDateFieldType(this); - } -} - -export class Time extends FieldType { - toString() { return `Time unit[${this.unit}], bitWidth[${this.bitWidth}]`; } - constructor(public unit: TimeUnit, public bitWidth: TimeBitWidth) { - super(Type.Time); - } - accept(visitor: Visitor) { - return visitor.visitTimeFieldType(this); - } -} - -export class Timestamp extends FieldType { - toString() { return `Timestamp unit[${this.unit}], timezone[${this.timezone}]`; } - constructor(public unit: TimeUnit, public timezone?: string | null) { - super(Type.Timestamp); - } - accept(visitor: Visitor) { - return visitor.visitTimestampFieldType(this); - } -} - -export class Interval extends FieldType { - toString() { return `Interval unit[${this.unit}]`; } - constructor(public unit: IntervalUnit) { - super(Type.Interval); - } - accept(visitor: Visitor) { - return visitor.visitIntervalFieldType(this); - } -} - -export class List extends FieldType { - toString() { return `List`; } - constructor() { - super(Type.List); - } - accept(visitor: Visitor) { - return visitor.visitListFieldType(this); - } -} - -export class Struct extends FieldType { - toString() { return `Struct`; } - constructor() { - super(Type.Struct_); - } - accept(visitor: Visitor) { - return visitor.visitStructFieldType(this); - } -} - -export class Union extends FieldType { - toString() { return `Union mode[${this.mode}], typeIds[${this.typeIds}]`; } - constructor(public mode: UnionMode, public typeIds: Type[]) { - super(Type.Union); - } - accept(visitor: Visitor) { - return visitor.visitUnionFieldType(this); - } -} - -export class FixedSizeBinary extends FieldType { - toString() { return `FixedSizeBinary byteWidth[${this.byteWidth}]`; } - constructor(public byteWidth: number) { - super(Type.FixedSizeBinary); - } - accept(visitor: Visitor) { - return visitor.visitFixedSizeBinaryFieldType(this); - } -} - -export class FixedSizeList extends FieldType { - toString() { return `FixedSizeList listSize[${this.listSize}]`; } - constructor(public listSize: number) { - super(Type.FixedSizeList); - } - accept(visitor: Visitor) { - return visitor.visitFixedSizeListFieldType(this); - } -} - -export class Map_ extends FieldType { - toString() { return `Map keysSorted[${this.keysSorted}]`; } - constructor(public keysSorted: boolean) { - super(Type.Map); - } - accept(visitor: Visitor) { - return visitor.visitMapFieldType(this); - } -} diff --git a/js/src/ipc/message.ts b/js/src/ipc/message.ts new file mode 100644 index 0000000000000..57712f84a6d6c --- /dev/null +++ b/js/src/ipc/message.ts @@ -0,0 +1,156 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { align } from '../util/bit'; +import * as Schema_ from '../fb/Schema'; +import * as Message_ from '../fb/Message'; +import { flatbuffers } from 'flatbuffers'; +import { DataType, Int, Dictionary } from '../type'; +import { MessageVisitor, VisitorNode } from '../visitor'; + +export import Long = flatbuffers.Long; +export import Endianness = Schema_.org.apache.arrow.flatbuf.Endianness; +export import MessageHeader = Message_.org.apache.arrow.flatbuf.MessageHeader; +export import MetadataVersion = Schema_.org.apache.arrow.flatbuf.MetadataVersion; + +export class Footer implements Partial { + constructor(public dictionaryBatches: Block[], public recordBatches: Block[], public schema: Schema) {} + acceptMessageVisitor(visitor: MessageVisitor): any { + return visitor.visitFooter(this); + } +} + +export class Block implements Partial { + public readonly offset: number; + public readonly offsetLong: Long; + public readonly bodyLength: number; + public readonly bodyLengthLong: Long; + constructor(offset: Long | number, public metaDataLength: number, bodyLength: Long | number) { + this.offset = (this.offsetLong = typeof offset === 'number' ? new Long(offset, 0) : offset).low; + this.bodyLength = (this.bodyLengthLong = typeof bodyLength === 'number' ? new Long(bodyLength, 0) : bodyLength).low; + } + acceptMessageVisitor(visitor: MessageVisitor): any { + return visitor.visitBlock(this); + } +} + +export class Message implements Partial { + public readonly bodyLength: number; + public readonly bodyLengthLong: Long; + constructor(public version: MetadataVersion, public headerType: MessageHeader, bodyLength: Long | number) { + this.bodyLength = (this.bodyLengthLong = typeof bodyLength === 'number' ? new Long(bodyLength, 0) : bodyLength).low; + } + acceptMessageVisitor(visitor: MessageVisitor): any { + return visitor.visitMessage(this); + } + static isSchema(x: Message): x is Schema { return x.headerType === MessageHeader.Schema; } + static isRecordBatch(x: Message): x is RecordBatch { return x.headerType === MessageHeader.RecordBatch; } + static isDictionaryBatch(x: Message): x is DictionaryBatch { return x.headerType === MessageHeader.DictionaryBatch; } +} + +export class Schema extends Message { + public dictionaries: Map; + constructor(version: MetadataVersion, public fields: Field[], public customMetadata?: Map, public endianness = Endianness.Little) { + super(version, MessageHeader.Schema, Long.ZERO); + this.dictionaries = fields.reduce(function flattenDictionaryFields(dictionaries, f): Map { + if (f.dictionary) { + const id = f.dictionary.id.toString(); + if (dictionaries.has(id)) { + dictionaries.set(id, f); + } + } + return (f.type.children || []).reduce(flattenDictionaryFields, dictionaries); + }, new Map()); + } +} + +export class RecordBatch extends Message { + public readonly length: number; + public readonly lengthLong: Long; + constructor(version: MetadataVersion, length: Long | number, public fieldNodes: FieldNode[], public buffers: Buffer[]) { + super(version, MessageHeader.RecordBatch, buffers.reduce((s, b) => align(s + b.length + (b.offset - s), 8), 0)); + this.length = (this.lengthLong = typeof length === 'number' ? new Long(length, 0) : length).low; + } + acceptMessageVisitor(visitor: MessageVisitor): any { + return visitor.visitRecordBatch(this); + } +} + +export class DictionaryBatch extends Message { + public readonly dictionaryId: number; + public readonly dictionaryIdLong: Long; + constructor(version: MetadataVersion, public dictionary: RecordBatch, dictionaryId: Long | number, public isDelta: boolean) { + super(version, MessageHeader.DictionaryBatch, dictionary.bodyLength); + this.dictionaryId = (this.dictionaryIdLong = typeof dictionaryId === 'number' ? new Long(dictionaryId, 0) : dictionaryId).low; + } + public get fieldNodes(): FieldNode[] { return this.dictionary.fieldNodes; } + public get buffers(): Buffer[] { return this.dictionary.buffers; } + public acceptMessageVisitor(visitor: MessageVisitor): any { + return visitor.visitDictionaryBatch(this); + } + static atomicDictionaryId = 0; +} + +export class Field implements Partial { + constructor(public name: string, + public type: T, + public nullable = false, + public metadata?: Map | null, + public dictionary?: Dictionary | null) { + } + get typeId(): T['TType'] { return this.type.TType; } + get [Symbol.toStringTag](): string { return 'Field'; } + get keys(): Field | Field> { + return !this.dictionary ? this : new Field>( + this.name, this.dictionary.indicies.type, + this.nullable, this.metadata, this.dictionary + ); + } + acceptMessageVisitor(visitor: MessageVisitor): any { + return visitor.visitField(this); + } + toString() { + return this[Symbol.toStringTag] + + ` name[${this.name}]` + + `, nullable[${this.nullable}]` + + `, type[${this.type.toString()}]`; + } +} + +export class Buffer implements Partial { + public readonly offset: number; + public readonly length: number; + constructor(offset: Long | number, length: Long | number) { + this.offset = typeof offset === 'number' ? offset : offset.low; + this.length = typeof length === 'number' ? length : length.low; + } + acceptMessageVisitor(visitor: MessageVisitor): any { + return visitor.visitBuffer(this); + } +} + +export class FieldNode implements Partial { + public readonly length: number; + public readonly nullCount: number; + constructor(length: Long | number, nullCount: Long | number) { + this.length = typeof length === 'number' ? length : length.low; + this.nullCount = typeof nullCount === 'number' ? nullCount : nullCount.low; + } + acceptMessageVisitor(visitor: MessageVisitor): any { + return visitor.visitFieldNode(this); + } +} diff --git a/js/src/reader/arrow.ts b/js/src/reader/arrow.ts deleted file mode 100644 index cf8a3d6a281a2..0000000000000 --- a/js/src/reader/arrow.ts +++ /dev/null @@ -1,48 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { readJSON } from './json'; -import { readBuffers, readBuffersAsync } from './buffer'; -import { readVectors, readVectorsAsync } from './vector'; -import { Vector } from '../vector/vector'; - -export { readJSON }; -export { readBuffers, readBuffersAsync }; -export { readVectors, readVectorsAsync }; - -export function* read(sources: Iterable | object | string) { - let input: any = sources; - let batches: Iterable; - if (typeof input === 'string') { - try { input = JSON.parse(input); } - catch (e) { input = sources; } - } - if (!input || typeof input !== 'object') { - batches = (typeof input === 'string') ? readVectors(readBuffers([input])) : []; - } else { - batches = (typeof input[Symbol.iterator] === 'function') - ? readVectors(readBuffers(input)) - : readVectors(readJSON(input)); - } - yield* batches; -} - -export async function* readAsync(sources: AsyncIterable) { - for await (let vectors of readVectorsAsync(readBuffersAsync(sources))) { - yield vectors; - } -} diff --git a/js/src/reader/buffer.ts b/js/src/reader/buffer.ts deleted file mode 100644 index c7b90507e396f..0000000000000 --- a/js/src/reader/buffer.ts +++ /dev/null @@ -1,229 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import { flatbuffers } from 'flatbuffers'; -import { VectorLayoutReader } from './vector'; -import { TypedArray, TypedArrayConstructor } from '../vector/types'; -import { footerFromByteBuffer, messageFromByteBuffer } from '../format/fb'; -import { Footer, Schema, RecordBatch, DictionaryBatch, Field, Buffer, FieldNode } from '../format/arrow'; -import ByteBuffer = flatbuffers.ByteBuffer; - -export function* readBuffers(sources: Iterable) { - let schema: Schema | null = null; - let readMessages: ((bb: ByteBuffer) => IterableIterator) | null = null; - for (const source of sources) { - const bb = toByteBuffer(source); - if ((!schema && ({ schema, readMessages } = readSchema(bb))) && schema && readMessages) { - for (const message of readMessages(bb)) { - yield { - schema, message, reader: new BufferVectorLayoutReader( - bb, - (function* (fieldNodes) { yield* fieldNodes; })(message.fieldNodes), - (function* (buffers) { yield* buffers; })(message.buffers) - ) as VectorLayoutReader - }; - } - } - } -} - -export async function* readBuffersAsync(sources: AsyncIterable) { - let schema: Schema | null = null; - let readMessages: ((bb: ByteBuffer) => IterableIterator) | null = null; - for await (const source of sources) { - const bb = toByteBuffer(source); - if ((!schema && ({ schema, readMessages } = readSchema(bb))) && schema && readMessages) { - for (const message of readMessages(bb)) { - yield { - schema, message, reader: new BufferVectorLayoutReader( - bb, - (function* (fieldNodes) { yield* fieldNodes; })(message.fieldNodes), - (function* (buffers) { yield* buffers; })(message.buffers) - ) as VectorLayoutReader - }; - } - } - } -} - -function toByteBuffer(bytes?: Uint8Array | NodeBuffer | string) { - let arr: Uint8Array = bytes as any || new Uint8Array(0); - if (typeof bytes === 'string') { - arr = new Uint8Array(bytes.length); - for (let i = -1, n = bytes.length; ++i < n;) { - arr[i] = bytes.charCodeAt(i); - } - return new ByteBuffer(arr); - } - return new ByteBuffer(arr); -} - -function readSchema(bb: ByteBuffer) { - let schema: Schema, readMessages, footer: Footer | null; - if (footer = readFileSchema(bb)) { - schema = footer.schema!; - readMessages = readFileMessages(footer); - } else if (schema = readStreamSchema(bb)!) { - readMessages = readStreamMessages; - } else { - throw new Error('Invalid Arrow buffer'); - } - return { schema, readMessages }; -} - -const PADDING = 4; -const MAGIC_STR = 'ARROW1'; -const MAGIC = new Uint8Array(MAGIC_STR.length); -for (let i = 0; i < MAGIC_STR.length; i += 1 | 0) { - MAGIC[i] = MAGIC_STR.charCodeAt(i); -} - -function checkForMagicArrowString(buffer: Uint8Array, index = 0) { - for (let i = -1, n = MAGIC.length; ++i < n;) { - if (MAGIC[i] !== buffer[index + i]) { - return false; - } - } - return true; -} - -const magicLength = MAGIC.length; -const magicAndPadding = magicLength + PADDING; -const magicX2AndPadding = magicLength * 2 + PADDING; - -function readStreamSchema(bb: ByteBuffer) { - if (!checkForMagicArrowString(bb.bytes(), 0)) { - for (const message of readMessages(bb)) { - if (message.isSchema()) { - return message as Schema; - } - } - } - return null; -} - -function* readStreamMessages(bb: ByteBuffer) { - for (const message of readMessages(bb)) { - if (message.isRecordBatch()) { - yield message; - } else if (message.isDictionaryBatch()) { - yield message; - } else { - continue; - } - // position the buffer after the body to read the next message - bb.setPosition(bb.position() + message.bodyLength.low); - } -} - -function readFileSchema(bb: ByteBuffer) { - let fileLength = bb.capacity(), footerLength: number, footerOffset: number; - if ((fileLength < magicX2AndPadding /* Arrow buffer too small */) || - (!checkForMagicArrowString(bb.bytes(), 0) /* Missing magic start */) || - (!checkForMagicArrowString(bb.bytes(), fileLength - magicLength) /* Missing magic end */) || - (/* Invalid footer length */ - (footerLength = bb.readInt32(footerOffset = fileLength - magicAndPadding)) < 1 && - (footerLength + magicX2AndPadding > fileLength))) { - return null; - } - bb.setPosition(footerOffset - footerLength); - return footerFromByteBuffer(bb); -} - -function readFileMessages(footer: Footer) { - return function* (bb: ByteBuffer) { - for (let i = -1, batches = footer.dictionaryBatches, n = batches.length; ++i < n;) { - bb.setPosition(batches[i].offset.low); - yield readMessage(bb, bb.readInt32(bb.position())) as DictionaryBatch; - } - for (let i = -1, batches = footer.recordBatches, n = batches.length; ++i < n;) { - bb.setPosition(batches[i].offset.low); - yield readMessage(bb, bb.readInt32(bb.position())) as RecordBatch; - } - }; -} - -function* readMessages(bb: ByteBuffer) { - let length: number, message: Schema | RecordBatch | DictionaryBatch; - while (bb.position() < bb.capacity() && - (length = bb.readInt32(bb.position())) > 0) { - if (message = readMessage(bb, length)!) { - yield message; - } - } -} - -function readMessage(bb: ByteBuffer, length: number) { - bb.setPosition(bb.position() + PADDING); - const message = messageFromByteBuffer(bb); - bb.setPosition(bb.position() + length); - return message; -} - -class BufferVectorLayoutReader implements VectorLayoutReader { - private offset: number; - private bytes: Uint8Array; - constructor(bb: ByteBuffer, private fieldNodes: Iterator, private buffers: Iterator) { - this.bytes = bb.bytes(); - this.offset = bb.position(); - } - readContainerLayout(field: Field) { - const { bytes, offset, buffers } = this, fieldNode = this.fieldNodes.next().value; - return { - field, fieldNode, - validity: createValidityArray(bytes, field, fieldNode, offset, buffers.next().value) - }; - } - readFixedWidthLayout(field: Field, dataType: TypedArrayConstructor) { - const { bytes, offset, buffers } = this, fieldNode = this.fieldNodes.next().value; - return { - field, fieldNode, - validity: createValidityArray(bytes, field, fieldNode, offset, buffers.next().value), - data: createTypedArray(bytes, field, fieldNode, offset, buffers.next().value, dataType) - }; - } - readBinaryLayout(field: Field) { - const { bytes, offset, buffers } = this, fieldNode = this.fieldNodes.next().value; - return { - field, fieldNode, - validity: createValidityArray(bytes, field, fieldNode, offset, buffers.next().value), - offsets: createTypedArray(bytes, field, fieldNode, offset, buffers.next().value, Int32Array), - data: createTypedArray(bytes, field, fieldNode, offset, buffers.next().value, Uint8Array) - }; - } - readVariableWidthLayout(field: Field) { - const { bytes, offset, buffers } = this, fieldNode = this.fieldNodes.next().value; - return { - field, fieldNode, - validity: createValidityArray(bytes, field, fieldNode, offset, buffers.next().value), - offsets: createTypedArray(bytes, field, fieldNode, offset, buffers.next().value, Int32Array) - }; - } -} - -function createValidityArray(bytes: Uint8Array, field: Field, fieldNode: FieldNode, offset: number, buffer: Buffer) { - return field.nullable && fieldNode.nullCount.low > 0 && - createTypedArray(bytes, field, fieldNode, offset, buffer, Uint8Array) || null; -} - -function createTypedArray(bytes: Uint8Array, _field: Field, _fieldNode: FieldNode, offset: number, buffer: Buffer, ArrayConstructor: TypedArrayConstructor): T { - return new ArrayConstructor( - bytes.buffer, - bytes.byteOffset + offset + buffer.offset.low, - buffer.length.low / ArrayConstructor.BYTES_PER_ELEMENT - ); -} diff --git a/js/src/reader/json.ts b/js/src/reader/json.ts deleted file mode 100644 index 49431496354e8..0000000000000 --- a/js/src/reader/json.ts +++ /dev/null @@ -1,176 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import * as Schema_ from '../format/fb/Schema'; -import { Int64, Int128 } from '../util/int'; -import { VectorLayoutReader } from './vector'; -import { TextEncoder } from 'text-encoding-utf-8'; -import DateUnit = Schema_.org.apache.arrow.flatbuf.DateUnit; -import { TypedArray, TypedArrayConstructor } from '../vector/types'; -import { schemaFromJSON, recordBatchFromJSON, dictionaryBatchFromJSON } from '../format/json'; -import { Schema, RecordBatch, DictionaryBatch, Field, Buffer, FieldNode } from '../format/arrow'; -export { Schema, RecordBatch, DictionaryBatch }; - -export function* readJSON(json: any) { - const schema = schemaFromJSON(json['schema']); - for (const batch of (json['dictionaries'] || [])) { - const message = dictionaryBatchFromJSON(batch); - yield { - schema, message, reader: new JSONVectorLayoutReader( - flattenDataSources(batch['data']['columns']), - (function* (fieldNodes) { yield* fieldNodes; })(message.fieldNodes), - (function* (buffers) { yield* buffers; })(message.buffers) - ) as VectorLayoutReader - }; - } - for (const batch of (json['batches'] || [])) { - const message = recordBatchFromJSON(batch); - yield { - schema, message, reader: new JSONVectorLayoutReader( - flattenDataSources(batch['columns']), - (function* (fieldNodes) { yield* fieldNodes; })(message.fieldNodes), - (function* (buffers) { yield* buffers; })(message.buffers) - ) as VectorLayoutReader - }; - } -} - -function flattenDataSources(xs: any[]): any[][] { - return (xs || []).reduce((buffers, column: any) => [ - ...buffers, - ...(column['VALIDITY'] && [column['VALIDITY']] || []), - ...(column['OFFSET'] && [column['OFFSET']] || []), - ...(column['DATA'] && [column['DATA']] || []), - ...flattenDataSources(column['children']) - ], [] as any[][]); -} - -class JSONVectorLayoutReader implements VectorLayoutReader { - constructor(private sources: any[][], private fieldNodes: Iterator, private buffers: Iterator) {} - readContainerLayout(field: Field) { - const { sources, buffers } = this, fieldNode = this.fieldNodes.next().value; - return { - field, fieldNode, - validity: createValidityArray(sources, field, fieldNode, buffers.next().value) - }; - } - readFixedWidthLayout(field: Field, dataType: TypedArrayConstructor) { - const { sources, buffers } = this, fieldNode = this.fieldNodes.next().value; - return { - field, fieldNode, - validity: createValidityArray(sources, field, fieldNode, buffers.next().value), - data: createDataArray(sources, field, fieldNode, buffers.next().value, dataType) - }; - } - readBinaryLayout(field: Field) { - const { sources, buffers } = this, fieldNode = this.fieldNodes.next().value; - return { - field, fieldNode, - validity: createValidityArray(sources, field, fieldNode, buffers.next().value), - offsets: new Int32Array(sources[buffers.next().value.offset.low]), - data: createDataArray(sources, field, fieldNode, buffers.next().value, Uint8Array) - }; - } - readVariableWidthLayout(field: Field) { - const { sources, buffers } = this, fieldNode = this.fieldNodes.next().value; - return { - field, fieldNode, - validity: createValidityArray(sources, field, fieldNode, buffers.next().value), - offsets: new Int32Array(sources[buffers.next().value.offset.low]), - }; - } -} - -function createValidityArray(sources: any[][], field: Field, fieldNode: FieldNode, buffer: Buffer) { - return field.nullable && fieldNode.nullCount.low > 0 && - booleanFromJSON(sources[buffer.offset.low]) || null; -} - -const encoder = new TextEncoder('utf-8'); - -function createDataArray(sources: any[][], field: Field, _fieldNode: FieldNode, buffer: Buffer, ArrayConstructor: TypedArrayConstructor): T { - let type = field.type, data: ArrayLike | ArrayBufferLike; - if (type.isTimestamp() === true) { - data = int64sFromJSON(sources[buffer.offset.low] as string[]); - } else if ((type.isInt() || type.isTime()) && type.bitWidth === 64) { - data = int64sFromJSON(sources[buffer.offset.low] as string[]); - } else if (type.isDate() && type.unit === DateUnit.MILLISECOND) { - data = int64sFromJSON(sources[buffer.offset.low] as string[]); - } else if (type.isDecimal() === true) { - data = decimalFromJSON(sources[buffer.offset.low] as string[]); - } else if (type.isBinary() === true) { - data = binaryFromJSON(sources[buffer.offset.low] as string[]); - } else if (type.isBool() === true) { - data = booleanFromJSON(sources[buffer.offset.low] as number[]).buffer; - } else if (type.isUtf8() === true) { - data = encoder.encode((sources[buffer.offset.low] as string[]).join('')); - } else { - data = (sources[buffer.offset.low]).map((x) => +x); - } - return new ArrayConstructor(data); -} - -function int64sFromJSON(values: string[]) { - const data = new Uint32Array(values.length * 2); - for (let i = -1, n = values.length; ++i < n;) { - // Force all values (even numbers) to be parsed as strings since - // pulling out high and low bits seems to lose precision sometimes - // For example: - // > -4613034156400212000 >>> 0 - // 721782784 - // The correct lower 32-bits are 721782752 - Int64.fromString(values[i].toString(), new Uint32Array(data.buffer, data.byteOffset + 2 * i * 4, 2)); - } - return data.buffer; -} - -function decimalFromJSON(values: string[]) { - const data = new Uint32Array(values.length * 4); - for (let i = -1, n = values.length; ++i < n;) { - Int128.fromString(values[i], new Uint32Array(data.buffer, data.byteOffset + 4 * 4 * i, 4)); - } - return data.buffer; -} - -function binaryFromJSON(values: string[]) { - // "DATA": ["49BC7D5B6C47D2","3F5FB6D9322026"] - // There are definitely more efficient ways to do this... but it gets the - // job done. - const joined = values.join(''); - const data = new Uint8Array(joined.length / 2); - for (let i = 0; i < joined.length; i += 2) { - data[i >> 1] = parseInt(joined.substr(i, 2), 16); - } - return data.buffer; -} - -function booleanFromJSON(arr: number[]) { - let xs = [], n, i = 0; - let bit = 0, byte = 0; - for (const value of arr) { - value && (byte |= 1 << bit); - if (++bit === 8) { - xs[i++] = byte; - byte = bit = 0; - } - } - if (i === 0 || bit > 0) { xs[i++] = byte; } - if (i % 8 && (n = i + 8 - i % 8)) { - do { xs[i] = 0; } while (++i < n); - } - return new Uint8Array(xs); -} diff --git a/js/src/reader/vector.ts b/js/src/reader/vector.ts deleted file mode 100644 index 3bd6d2bb67650..0000000000000 --- a/js/src/reader/vector.ts +++ /dev/null @@ -1,255 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import * as Schema_ from '../format/fb/Schema'; -import { TypedArray, TypedArrayConstructor } from '../vector/types'; -import { Schema, RecordBatch, DictionaryBatch, Field, FieldNode } from '../format/arrow'; -import { Int, Date, Time, Timestamp, Decimal, FixedSizeList, FixedSizeBinary, FloatingPoint } from '../format/arrow'; -import { - Vector, BoolVector, BinaryVector, DictionaryVector, - Int8Vector, Int16Vector, Int32Vector, Int64Vector, - Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector, - Utf8Vector, ListVector, FixedSizeListVector, StructVector, - Float16Vector, Float32Vector, Float64Vector, DecimalVector, - Date32Vector, Date64Vector, Time32Vector, Time64Vector, TimestampVector, -} from '../vector/arrow'; - -import Type = Schema_.org.apache.arrow.flatbuf.Type; -import DateUnit = Schema_.org.apache.arrow.flatbuf.DateUnit; -import TimeUnit = Schema_.org.apache.arrow.flatbuf.TimeUnit; -import Precision = Schema_.org.apache.arrow.flatbuf.Precision; -// import IntervalUnit = Schema_.org.apache.arrow.flatbuf.IntervalUnit; - -export interface ContainerLayout { - fieldNode: FieldNode; - validity: Uint8Array | null | void; -} - -export interface VariableWidthLayout { - fieldNode: FieldNode; - offsets: Int32Array; - validity: Uint8Array | null | void; -} - -export interface BinaryLayout extends FixedWidthLayout { - offsets: Int32Array; -} - -export interface FixedWidthLayout { - fieldNode: FieldNode; - data: T; - validity: Uint8Array | null | void; -} - -export function* readVectors(messages: Iterable<{ schema: Schema, message: RecordBatch | DictionaryBatch, reader: VectorLayoutReader }>) { - const dictionaries = new Map(); - for (const { schema, message, reader } of messages) { - yield* readMessageVectors(schema, message, new VectorReader(dictionaries, reader)); - } -} - -export async function* readVectorsAsync(messages: AsyncIterable<{ schema: Schema, message: RecordBatch | DictionaryBatch, reader: VectorLayoutReader }>) { - const dictionaries = new Map(); - for await (const { schema, message, reader } of messages) { - yield* readMessageVectors(schema, message, new VectorReader(dictionaries, reader)); - } -} - -function* readMessageVectors(schema: Schema, message: RecordBatch | DictionaryBatch, reader: VectorReader) { - if (message.isRecordBatch() === true) { - yield schema.fields.map((field) => reader.readVector(field)); - } else if (message.isDictionaryBatch()) { - let id = message.dictionaryId.toFloat64().toString(); - let vector = reader.readValueVector(schema.dictionaries.get(id)!); - if (message.isDelta) { - vector = reader.dictionaries.get(id)!.concat(vector); - } - reader.dictionaries.set(id, vector); - } -} - -export interface VectorLayoutReader { - readBinaryLayout(field: Field): BinaryLayout; - readContainerLayout(field: Field): ContainerLayout; - readVariableWidthLayout(field: Field): VariableWidthLayout; - readFixedWidthLayout(field: Field, TypedArrayConstructor: TypedArrayConstructor): FixedWidthLayout; -} - -export class VectorReader implements VectorLayoutReader { - constructor(public dictionaries: Map, protected layout: VectorLayoutReader) {} - readVector(field: Field): Vector { - return this.readDictionaryVector(field) || this.readValueVector(field); - } - readDictionaryVector(field: Field) { - const encoding = field.dictionary; - if (encoding) { - const keys = this.readIntVector(field.indexField()); - const data = this.dictionaries.get(encoding.dictionaryId.toFloat64().toString())!; - return new DictionaryVector({ - field, data, keys, - validity: (keys as any).validity, - fieldNode: (keys as any).fieldNode, - }); - } - return null; - } - readValueVector(field: Field) { - switch (field.typeType) { - case Type.NONE: return this.readNullVector(); - case Type.Null: return this.readNullVector(); - // case Type.Map: return this.readMapVector(field); - case Type.Int: return this.readIntVector(field); - case Type.Bool: return this.readBoolVector(field); - case Type.Date: return this.readDateVector(field); - case Type.List: return this.readListVector(field); - case Type.Utf8: return this.readUtf8Vector(field); - case Type.Time: return this.readTimeVector(field); - // case Type.Union: return this.readUnionVector(field); - case Type.Binary: return this.readBinaryVector(field); - case Type.Decimal: return this.readDecimalVector(field); - case Type.Struct_: return this.readStructVector(field); - case Type.FloatingPoint: return this.readFloatVector(field); - case Type.Timestamp: return this.readTimestampVector(field); - case Type.FixedSizeList: return this.readFixedSizeListVector(field); - case Type.FixedSizeBinary: return this.readFixedSizeBinaryVector(field); - } - throw new Error(`Unrecognized ${field.toString()}`); - } - readNullVector() { - return new Vector(); - } - readBoolVector(field: Field) { - return new BoolVector(this.readFixedWidthLayout(field, Uint8Array)); - } - readDateVector(field: Field) { - const type = field.type as Date; - switch (type.unit) { - case DateUnit.DAY: return new Date32Vector({ ...this.readFixedWidthLayout(field, Int32Array), unit: DateUnit[type.unit] }); - case DateUnit.MILLISECOND: return new Date64Vector({ ...this.readFixedWidthLayout(field, Int32Array), unit: DateUnit[type.unit] }); - } - throw new Error(`Unrecognized ${type.toString()}`); - } - readTimeVector(field: Field) { - const type = field.type as Time; - switch (type.bitWidth) { - case 32: return new Time32Vector({ ...this.readFixedWidthLayout(field, Int32Array), unit: TimeUnit[type.unit] }); - case 64: return new Time64Vector({ ...this.readFixedWidthLayout(field, Uint32Array), unit: TimeUnit[type.unit] }); - } - throw new Error(`Unrecognized ${type.toString()}`); - } - readTimestampVector(field: Field) { - const type = field.type as Timestamp; - const { fieldNode, validity, data } = this.readFixedWidthLayout(field, Uint32Array); - return new TimestampVector({ - field, fieldNode, validity, data, - timezone: type.timezone!, - unit: TimeUnit[type.unit], - }); - } - readListVector(field: Field) { - const { fieldNode, validity, offsets } = this.readVariableWidthLayout(field); - return new ListVector({ - field, fieldNode, validity, offsets, - values: this.readVector(field.children[0]) - }); - } - readStructVector(field: Field) { - const { fieldNode, validity } = this.readContainerLayout(field); - return new StructVector({ - field, fieldNode, validity, - columns: field.children.map((field) => this.readVector(field)) - }); - } - readBinaryVector(field: Field) { - return new BinaryVector(this.readBinaryLayout(field)); - } - readDecimalVector(field: Field) { - const type = field.type as Decimal; - const { fieldNode, validity, data } = this.readFixedWidthLayout(field, Uint32Array); - return new DecimalVector({ - scale: type.scale, - precision: type.precision, - field, fieldNode, validity, data - }); - } - readUtf8Vector(field: Field) { - const { fieldNode, validity, offsets, data } = this.readBinaryLayout(field); - return new Utf8Vector({ - field, fieldNode, - values: new BinaryVector({ - validity, offsets, data - }) - }); - } - readFixedSizeListVector(field: Field) { - const type = field.type as FixedSizeList; - const { fieldNode, validity } = this.readContainerLayout(field); - return new FixedSizeListVector({ - field, fieldNode, validity, - size: type.listSize, - values: this.readVector(field.children[0]) - }); - } - readFixedSizeBinaryVector(field: Field) { - const type = field.type as FixedSizeBinary; - const { fieldNode, validity, data } = this.readFixedWidthLayout(field, Uint8Array); - return new FixedSizeListVector({ - size: type.byteWidth, - field, fieldNode, validity, - values: new Uint8Vector({ data }) - }); - } - readFloatVector(field: Field) { - const type = field.type as FloatingPoint; - switch (type.precision) { - case Precision.HALF: return new Float16Vector(this.readFixedWidthLayout(field, Uint16Array)); - case Precision.SINGLE: return new Float32Vector(this.readFixedWidthLayout(field, Float32Array)); - case Precision.DOUBLE: return new Float64Vector(this.readFixedWidthLayout(field, Float64Array)); - } - throw new Error(`Unrecognized FloatingPoint { precision: ${type.precision} }`); - } - readIntVector(field: Field) { - const type = field.type as Int; - if (type.isSigned) { - switch (type.bitWidth) { - case 8: return new Int8Vector(this.readFixedWidthLayout(field, Int8Array)); - case 16: return new Int16Vector(this.readFixedWidthLayout(field, Int16Array)); - case 32: return new Int32Vector(this.readFixedWidthLayout(field, Int32Array)); - case 64: return new Int64Vector(this.readFixedWidthLayout(field, Int32Array)); - } - } - switch (type.bitWidth) { - case 8: return new Uint8Vector(this.readFixedWidthLayout(field, Uint8Array)); - case 16: return new Uint16Vector(this.readFixedWidthLayout(field, Uint16Array)); - case 32: return new Uint32Vector(this.readFixedWidthLayout(field, Uint32Array)); - case 64: return new Uint64Vector(this.readFixedWidthLayout(field, Uint32Array)); - } - throw new Error(`Unrecognized Int { isSigned: ${type.isSigned}, bitWidth: ${type.bitWidth} }`); - } - readContainerLayout(field: Field) { - return this.layout.readContainerLayout(field); - } - readBinaryLayout(field: Field) { - return this.layout.readBinaryLayout(field); - } - readVariableWidthLayout(field: Field) { - return this.layout.readVariableWidthLayout(field); - } - readFixedWidthLayout(field: Field, TypedArrayConstructor: TypedArrayConstructor) { - return this.layout.readFixedWidthLayout(field, TypedArrayConstructor); - } -} diff --git a/js/src/type.ts b/js/src/type.ts new file mode 100644 index 0000000000000..0c39a449e0cb6 --- /dev/null +++ b/js/src/type.ts @@ -0,0 +1,501 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Data } from './data'; +import * as Schema_ from './fb/Schema'; +import { Vector, View } from './vector'; +import { flatbuffers } from 'flatbuffers'; +import { TypeVisitor, VisitorNode } from './visitor'; +import { Field, DictionaryBatch } from './ipc/message'; + +export import Long = flatbuffers.Long; +export import ArrowType = Schema_.org.apache.arrow.flatbuf.Type; +export import DateUnit = Schema_.org.apache.arrow.flatbuf.DateUnit; +export import TimeUnit = Schema_.org.apache.arrow.flatbuf.TimeUnit; +export import Precision = Schema_.org.apache.arrow.flatbuf.Precision; +export import UnionMode = Schema_.org.apache.arrow.flatbuf.UnionMode; +export import VectorType = Schema_.org.apache.arrow.flatbuf.VectorType; +export import IntervalUnit = Schema_.org.apache.arrow.flatbuf.IntervalUnit; + +export type TimeBitWidth = 32 | 64; +export type IntBitWidth = 1 | 8 | 16 | 32 | 64; + +export type NumericType = Int | Float | Date_ | Time | Interval | Timestamp; +export type FixedSizeType = Int64 | Uint64 | Decimal | FixedSizeBinary; +export type PrimitiveType = NumericType | FixedSizeType; + +export type FlatListType = Utf8 | Binary; // <-- these types have `offset`, `data`, and `validity` buffers +export type FlatType = Bool | PrimitiveType | FlatListType; // <-- these types have `data` and `validity` buffers +export type ListType = List | FixedSizeList | FlatListType; // <-- these types have `offset` and `validity` buffers +export type NestedType = Map_ | Struct | List | FixedSizeList | Union; // <-- these types have `validity` buffer and nested childData + +/** + * * + * Main data type enumeration: + * * + * Data types in this library are all *logical*. They can be expressed as + * either a primitive physical type (bytes or bits of some fixed size), a + * nested type consisting of other data types, or another data type (e.g. a + * timestamp encoded as an int64) + */ + export const enum Type { + NONE = 0, // The default placeholder type + Null = 1, // A NULL type having no physical storage + Int = 2, // Signed or unsigned 8, 16, 32, or 64-bit little-endian integer + Float = 3, // 2, 4, or 8-byte floating point value + Binary = 4, // Variable-length bytes (no guarantee of UTF8-ness) + Utf8 = 5, // UTF8 variable-length string as List + Bool = 6, // Boolean as 1 bit, LSB bit-packed ordering + Decimal = 7, // Precision-and-scale-based decimal type. Storage type depends on the parameters. + Date = 8, // int32_t days or int64_t milliseconds since the UNIX epoch + Time = 9, // Time as signed 32 or 64-bit integer, representing either seconds, milliseconds, microseconds, or nanoseconds since midnight since midnight + Timestamp = 10, // Exact timestamp encoded with int64 since UNIX epoch (Default unit millisecond) + Interval = 11, // YEAR_MONTH or DAY_TIME interval in SQL style + List = 12, // A list of some logical data type + Struct = 13, // Struct of logical types + Union = 14, // Union of logical types + FixedSizeBinary = 15, // Fixed-size binary. Each value occupies the same number of bytes + FixedSizeList = 16, // Fixed-size list. Each value occupies the same number of bytes + Map = 17, // Map of named logical types + Dictionary = 'Dictionary', // Dictionary aka Category type + DenseUnion = 'DenseUnion', // Dense Union of logical types + SparseUnion = 'SparseUnion', // Sparse Union of logical types +} + +export interface DataType { + readonly TType: TType; + readonly TArray: any; + readonly TValue: any; + readonly ArrayType: any; +} + +export abstract class DataType implements Partial { + + public get [Symbol.toStringTag]() { return 'DataType'; } + + static isNull (x: DataType): x is Null { return x.TType === Type.Null; } + static isInt (x: DataType): x is Int { return x.TType === Type.Int; } + static isFloat (x: DataType): x is Float { return x.TType === Type.Float; } + static isBinary (x: DataType): x is Binary { return x.TType === Type.Binary; } + static isUtf8 (x: DataType): x is Utf8 { return x.TType === Type.Utf8; } + static isBool (x: DataType): x is Bool { return x.TType === Type.Bool; } + static isDecimal (x: DataType): x is Decimal { return x.TType === Type.Decimal; } + static isDate (x: DataType): x is Date_ { return x.TType === Type.Date; } + static isTime (x: DataType): x is Time { return x.TType === Type.Time; } + static isTimestamp (x: DataType): x is Timestamp { return x.TType === Type.Timestamp; } + static isInterval (x: DataType): x is Interval { return x.TType === Type.Interval; } + static isList (x: DataType): x is List { return x.TType === Type.List; } + static isStruct (x: DataType): x is Struct { return x.TType === Type.Struct; } + static isUnion (x: DataType): x is Union { return x.TType === Type.Union; } + static isDenseUnion (x: DataType): x is DenseUnion { return x.TType === Type.DenseUnion; } + static isSparseUnion (x: DataType): x is SparseUnion { return x.TType === Type.SparseUnion; } + static isFixedSizeBinary (x: DataType): x is FixedSizeBinary { return x.TType === Type.FixedSizeBinary; } + static isFixedSizeList (x: DataType): x is FixedSizeList { return x.TType === Type.FixedSizeList; } + static isMap (x: DataType): x is Map_ { return x.TType === Type.Map; } + static isDictionary (x: DataType): x is Dictionary { return x.TType === Type.Dictionary; } + + constructor(public readonly TType: TType, + public readonly children?: Field[]) {} + + acceptTypeVisitor(visitor: TypeVisitor): any { + switch (this.TType) { + case Type.Null: return DataType.isNull(this) && visitor.visitNull(this) || null; + case Type.Int: return DataType.isInt(this) && visitor.visitInt(this) || null; + case Type.Float: return DataType.isFloat(this) && visitor.visitFloat(this) || null; + case Type.Binary: return DataType.isBinary(this) && visitor.visitBinary(this) || null; + case Type.Utf8: return DataType.isUtf8(this) && visitor.visitUtf8(this) || null; + case Type.Bool: return DataType.isBool(this) && visitor.visitBool(this) || null; + case Type.Decimal: return DataType.isDecimal(this) && visitor.visitDecimal(this) || null; + case Type.Date: return DataType.isDate(this) && visitor.visitDate(this) || null; + case Type.Time: return DataType.isTime(this) && visitor.visitTime(this) || null; + case Type.Timestamp: return DataType.isTimestamp(this) && visitor.visitTimestamp(this) || null; + case Type.Interval: return DataType.isInterval(this) && visitor.visitInterval(this) || null; + case Type.List: return DataType.isList(this) && visitor.visitList(this) || null; + case Type.Struct: return DataType.isStruct(this) && visitor.visitStruct(this) || null; + case Type.Union: return DataType.isUnion(this) && visitor.visitUnion(this) || null; + case Type.FixedSizeBinary: return DataType.isFixedSizeBinary(this) && visitor.visitFixedSizeBinary(this) || null; + case Type.FixedSizeList: return DataType.isFixedSizeList(this) && visitor.visitFixedSizeList(this) || null; + case Type.Map: return DataType.isMap(this) && visitor.visitMap(this) || null; + case Type.Dictionary: return DataType.isDictionary(this) && visitor.visitDictionary(this) || null; + default: return null; + } + } +} + +export interface Null extends DataType { TArray: void; TValue: null; } +export class Null extends DataType { + constructor() { super(Type.Null); } + public get [Symbol.toStringTag]() { return 'Null'; } + public toString() { return `Null`; } + public acceptTypeVisitor(visitor: TypeVisitor): any { + return visitor.visitNull(this); + } +} + +export interface Int extends DataType { TArray: TArrayType; TValue: TValueType; } +export class Int extends DataType { + // @ts-ignore + public readonly ArrayType: TypedArrayConstructor; + constructor(public readonly isSigned: boolean, + public readonly bitWidth: IntBitWidth) { + super(Type.Int); + } + public get [Symbol.toStringTag]() { return 'Int'; } + public toString() { return `${this.isSigned ? `` : `u`}int${this.bitWidth}`; } + public acceptTypeVisitor(visitor: TypeVisitor): any { return visitor.visitInt(this); } +} + +export class Int8 extends Int { + constructor() { super(true, 8); } + public get [Symbol.toStringTag]() { return 'Int8'; } + public get ArrayType() { return Int8Array; } +} + +export class Int16 extends Int { + constructor() { super(true, 16); } + public get [Symbol.toStringTag]() { return 'Int16'; } + public get ArrayType() { return Int16Array; } +} + +export class Int32 extends Int { + constructor() { super(true, 32); } + public get [Symbol.toStringTag]() { return 'Int32'; } + public get ArrayType() { return Int32Array; } +} + +export class Int64 extends Int { + constructor() { super(true, 64); } + public get [Symbol.toStringTag]() { return 'Int64'; } + public get ArrayType() { return Int32Array; } +} + +export class Uint8 extends Int { + constructor() { super(false, 8); } + public get [Symbol.toStringTag]() { return 'Uint8'; } + public get ArrayType() { return Uint8Array; } +} + +export class Uint16 extends Int { + constructor() { super(false, 16); } + public get [Symbol.toStringTag]() { return 'Uint16'; } + public get ArrayType() { return Uint16Array; } +} + +export class Uint32 extends Int { + constructor() { super(false, 32); } + public get [Symbol.toStringTag]() { return 'Uint32'; } + public get ArrayType() { return Uint32Array; } +} + +export class Uint64 extends Int { + constructor() { super(false, 64); } + public get [Symbol.toStringTag]() { return 'Uint64'; } + public get ArrayType() { return Uint32Array; } +} + +export interface Float extends DataType { TArray: TArrayType; TValue: number; } +export class Float extends DataType { + // @ts-ignore + public readonly ArrayType: TypedArrayConstructor; + public get [Symbol.toStringTag]() { return 'Float'; } + constructor(public readonly precision: Precision) { + super(Type.Float); + } + public toString() { return `Float precision[${this.precision}]`; } + public acceptTypeVisitor(visitor: TypeVisitor): any { return visitor.visitFloat(this); } +} + +export class Float16 extends Float { + constructor() { super(Precision.HALF); } + public get [Symbol.toStringTag]() { return 'Float16'; } + public get ArrayType() { return Uint16Array; } +} + +export class Float32 extends Float { + constructor() { super(Precision.SINGLE); } + public get [Symbol.toStringTag]() { return 'Float32'; } + public get ArrayType() { return Float32Array; } +} + +export class Float64 extends Float { + constructor() { super(Precision.DOUBLE); } + public get [Symbol.toStringTag]() { return 'Float64'; } + public get ArrayType() { return Float64Array; } +} + +export interface Binary extends DataType { TArray: Uint8Array; TValue: Uint8Array; } +export class Binary extends DataType { + constructor() { super(Type.Binary); } + public get [Symbol.toStringTag]() { return 'Binary'; } + public toString() { return `Binary`; } + public get ArrayType() { return Uint8Array; } + public acceptTypeVisitor(visitor: TypeVisitor): any { + return visitor.visitBinary(this); + } +} + +export interface Utf8 extends DataType { TArray: Uint8Array; TValue: string; } +export class Utf8 extends DataType { + constructor() { super(Type.Utf8); } + public get [Symbol.toStringTag]() { return 'Utf8'; } + public toString() { return `Utf8`; } + public get ArrayType() { return Uint8Array; } + public acceptTypeVisitor(visitor: TypeVisitor): any { + return visitor.visitUtf8(this); + } +} + +export interface Bool extends DataType { TArray: Uint8Array; TValue: boolean; } +export class Bool extends DataType { + constructor() { super(Type.Bool); } + public get [Symbol.toStringTag]() { return 'Bool'; } + public toString() { return `Bool`; } + public get ArrayType() { return Uint8Array; } + public acceptTypeVisitor(visitor: TypeVisitor): any { + return visitor.visitBool(this); + } +} + +export interface Decimal extends DataType { TArray: Uint32Array; TValue: Uint32Array; } +export class Decimal extends DataType { + constructor(public readonly scale: number, + public readonly precision: number) { + super(Type.Decimal); + } + public get [Symbol.toStringTag]() { return 'Decimal'; } + public get ArrayType() { return Uint32Array; } + public toString() { return `Decimal scale[${this.scale}], precision[${this.precision}]`; } + public acceptTypeVisitor(visitor: TypeVisitor): any { + return visitor.visitDecimal(this); + } +} + +/* tslint:disable:class-name */ +export interface Date_ extends DataType { TArray: Int32Array; TValue: Date; } +export class Date_ extends DataType { + constructor(public readonly unit: DateUnit) { super(Type.Date); } + public get [Symbol.toStringTag]() { return 'Date_'; } + public get ArrayType() { return Int32Array; } + public toString() { return `Date unit[${this.unit}]`; } + public acceptTypeVisitor(visitor: TypeVisitor): any { return visitor.visitDate(this); } +} + +export interface Time extends DataType { TArray: Uint32Array; TValue: number; } +export class Time extends DataType { + constructor(public readonly unit: TimeUnit, + public readonly bitWidth: TimeBitWidth) { + super(Type.Time); + } + public get [Symbol.toStringTag]() { return 'Time'; } + public get ArrayType() { return Int32Array; } + public toString() { return `Time unit[${this.unit}], bitWidth[${this.bitWidth}]`; } + public acceptTypeVisitor(visitor: TypeVisitor): any { return visitor.visitTime(this); } +} + +export interface Timestamp extends DataType { TArray: Int32Array; TValue: number; } +export class Timestamp extends DataType { + constructor(public unit: TimeUnit, public timezone?: string | null) { + super(Type.Timestamp); + } + public get [Symbol.toStringTag]() { return 'Timestamp'; } + public get ArrayType() { return Int32Array; } + public toString() { return `Timestamp unit[${this.unit}], timezone[${this.timezone}]`; } + public acceptTypeVisitor(visitor: TypeVisitor): any { + return visitor.visitTimestamp(this); + } +} + +export interface Interval extends DataType { TArray: Int32Array; TValue: Int32Array; } +export class Interval extends DataType { + constructor(public unit: IntervalUnit) { + super(Type.Interval); + } + public get [Symbol.toStringTag]() { return 'Interval'; } + public get ArrayType() { return Int32Array; } + public toString() { return `Interval unit[${this.unit}]`; } + public acceptTypeVisitor(visitor: TypeVisitor): any { + return visitor.visitInterval(this); + } +} + +export interface List extends DataType { TArray: any; TValue: Vector; } +export class List extends DataType { + constructor(public children: Field[]) { + super(Type.List, children); + } + public get [Symbol.toStringTag]() { return 'List'; } + public toString() { return `List`; } + public get valueType() { return this.children[0].type as T; } + public get valueField() { return this.children[0] as Field; } + public acceptTypeVisitor(visitor: TypeVisitor): any { + return visitor.visitList(this); + } +} + +export interface Struct extends DataType { TArray: Uint8Array; TValue: View; } +export class Struct extends DataType { + constructor(public children: Field[]) { + super(Type.Struct, children); + } + public get [Symbol.toStringTag]() { return 'Struct'; } + public toString() { return `Struct`; } + public acceptTypeVisitor(visitor: TypeVisitor): any { + return visitor.visitStruct(this); + } +} + +export interface Union extends DataType { TArray: Int8Array; TValue: any; } +export class Union extends DataType { + constructor(TType: TType, + public readonly mode: UnionMode, + public readonly typeIds: ArrowType[], + public readonly children: Field[]) { + super(TType, children); + } + public get [Symbol.toStringTag]() { return 'Union'; } + public get ArrayType() { return Int8Array; } + public toString() { return `Union mode[${this.mode}] typeIds[${this.typeIds}]`; } + public acceptTypeVisitor(visitor: TypeVisitor): any { return visitor.visitUnion(this); } +} + +export class DenseUnion extends Union { + constructor(typeIds: ArrowType[], children: Field[]) { + super(Type.DenseUnion, UnionMode.Dense, typeIds, children); + } + public get [Symbol.toStringTag]() { return 'DenseUnion'; } + public toString() { return `DenseUnion typeIds[${this.typeIds}]`; } +} + +export class SparseUnion extends Union { + constructor(typeIds: ArrowType[], children: Field[]) { + super(Type.SparseUnion, UnionMode.Sparse, typeIds, children); + } + public get [Symbol.toStringTag]() { return 'SparseUnion'; } + public toString() { return `SparseUnion typeIds[${this.typeIds}]`; } +} + +export interface FixedSizeBinary extends DataType { TArray: Uint8Array; TValue: Uint8Array; } +export class FixedSizeBinary extends DataType { + constructor(public readonly byteWidth: number) { + super(Type.FixedSizeBinary); + } + public get [Symbol.toStringTag]() { return 'FixedSizeBinary'; } + public get ArrayType() { return Uint8Array; } + public toString() { return `FixedSizeBinary byteWidth[${this.byteWidth}]`; } + public acceptTypeVisitor(visitor: TypeVisitor): any { return visitor.visitFixedSizeBinary(this); } +} + +export interface FixedSizeList extends DataType { TArray: any; TValue: Vector; } +export class FixedSizeList extends DataType { + constructor(public readonly listSize: number, + public readonly children: Field[]) { + super(Type.FixedSizeList, children); + } + public get [Symbol.toStringTag]() { return 'FixedSizeList'; } + public get valueType() { return this.children[0].type as T; } + public get valueField() { return this.children[0] as Field; } + public toString() { return `FixedSizeList listSize[${this.listSize}]`; } + public acceptTypeVisitor(visitor: TypeVisitor): any { return visitor.visitFixedSizeList(this); } +} + +/* tslint:disable:class-name */ +export interface Map_ extends DataType { TArray: Uint8Array; TValue: View; } +export class Map_ extends DataType { + constructor(public readonly keysSorted: boolean, + public readonly children: Field[]) { + super(Type.Map, children); + } + public get [Symbol.toStringTag]() { return 'Map'; } + public toString() { return `Map keysSorted[${this.keysSorted}]`; } + public acceptTypeVisitor(visitor: TypeVisitor): any { return visitor.visitMap(this); } +} + +export interface Dictionary extends DataType { TArray: T['TArray']; TValue: T['TValue']; } +export class Dictionary extends DataType { + public readonly id: number; + public readonly indicies: Data; + public readonly isOrdered: boolean; + public readonly dictionary: Data; + constructor(dictionary: Data, indicies: Data, id?: Long | number | null, isOrdered?: boolean | null) { + super(Type.Dictionary); + this.indicies = indicies; // a dictionary index defaults to signed 32 bit int if unspecified + this.dictionary = dictionary; + this.isOrdered = isOrdered || false; + this.id = id == null ? + DictionaryBatch.atomicDictionaryId++ : + typeof id === 'number' ? id : id.low ; + } + public get [Symbol.toStringTag]() { return 'Dictionary'; } + public acceptTypeVisitor(visitor: TypeVisitor): any { + return visitor.visitDictionary(this); + } +} +export interface IterableArrayLike extends ArrayLike, Iterable {} + +export interface TypedArrayConstructor { + readonly prototype: T; + readonly BYTES_PER_ELEMENT: number; + new (length: number): T; + new (elements: Iterable): T; + new (arrayOrArrayBuffer: ArrayLike | ArrayBufferLike): T; + new (buffer: ArrayBufferLike, byteOffset: number, length?: number): T; + of(...items: number[]): T; + from(arrayLike: ArrayLike | Iterable, mapfn?: (v: number, k: number) => number, thisArg?: any): T; +} + +export type FloatArray = Uint16Array | Float32Array | Float64Array; +export type IntArray = Int8Array | Int16Array | Int32Array | Uint8Array | Uint16Array | Uint32Array; + +export interface TypedArray extends Iterable { + [index: number]: number; + readonly length: number; + readonly byteLength: number; + readonly byteOffset: number; + readonly buffer: ArrayBufferLike; + readonly BYTES_PER_ELEMENT: number; + [Symbol.toStringTag]: any; + [Symbol.iterator](): IterableIterator; + entries(): IterableIterator<[number, number]>; + keys(): IterableIterator; + values(): IterableIterator; + copyWithin(target: number, start: number, end?: number): this; + every(callbackfn: (value: number, index: number, array: TypedArray) => boolean, thisArg?: any): boolean; + fill(value: number, start?: number, end?: number): this; + filter(callbackfn: (value: number, index: number, array: TypedArray) => any, thisArg?: any): TypedArray; + find(predicate: (value: number, index: number, obj: TypedArray) => boolean, thisArg?: any): number | undefined; + findIndex(predicate: (value: number, index: number, obj: TypedArray) => boolean, thisArg?: any): number; + forEach(callbackfn: (value: number, index: number, array: TypedArray) => void, thisArg?: any): void; + includes(searchElement: number, fromIndex?: number): boolean; + indexOf(searchElement: number, fromIndex?: number): number; + join(separator?: string): string; + lastIndexOf(searchElement: number, fromIndex?: number): number; + map(callbackfn: (value: number, index: number, array: TypedArray) => number, thisArg?: any): TypedArray; + reduce(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: TypedArray) => number): number; + reduce(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: TypedArray) => number, initialValue: number): number; + reduce(callbackfn: (previousValue: U, currentValue: number, currentIndex: number, array: TypedArray) => U, initialValue: U): U; + reduceRight(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: TypedArray) => number): number; + reduceRight(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: TypedArray) => number, initialValue: number): number; + reduceRight(callbackfn: (previousValue: U, currentValue: number, currentIndex: number, array: TypedArray) => U, initialValue: U): U; + reverse(): TypedArray; + set(array: ArrayLike, offset?: number): void; + slice(start?: number, end?: number): TypedArray; + some(callbackfn: (value: number, index: number, array: TypedArray) => boolean, thisArg?: any): boolean; + sort(compareFn?: (a: number, b: number) => number): this; + subarray(begin: number, end?: number): TypedArray; + toLocaleString(): string; + toString(): string; +} diff --git a/js/src/util/bit.ts b/js/src/util/bit.ts new file mode 100644 index 0000000000000..e326f62cb67f6 --- /dev/null +++ b/js/src/util/bit.ts @@ -0,0 +1,107 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { TypedArray } from '../type'; + +export function align(value: number, alignment: number) { + return value + padding(value, alignment); +} + +export function padding(value: number, alignment: number) { + return (value % alignment === 0 ? 0 : alignment - value % alignment); +} + +export function getBool(_data: any, _index: number, byte: number, bit: number) { + return (byte & 1 << bit) !== 0; +} + +export function getBit(_data: any, _index: number, byte: number, bit: number): 0 | 1 { + return (byte & 1 << bit) >> bit as (0 | 1); +} + +export function* iterateBits(bytes: Uint8Array, begin: number, length: number, context: any, + get: (context: any, index: number, byte: number, bit: number) => T) { + let bit = begin % 8; + let bitLen = 8 - bit; + let byteIndex = begin >> 3; + let byte = bytes[byteIndex]; + let remaining = length, index = begin; + while (remaining > 0) { + while (++bit < bitLen) { + yield get(context, index++, byte, bit); + } + bit = 0; + byte = bytes[++byteIndex]; + bitLen = Math.min(8, remaining -= 8); + } +} + +/** + * Compute the population count (the number of bits set to 1) for a range of bits in a Uint8Array. + * @param vector The Uint8Array of bits for which to compute the population count. + * @param lhs The range's left-hand side (or start) bit + * @param rhs The range's right-hand side (or end) bit + */ +export function popcnt_bit_range(data: Uint8Array, lhs: number, rhs: number): number { + if (rhs - lhs <= 0) { return 0; } + // If the bit range is less than one byte, sum the 1 bits in the bit range + if (rhs - lhs < 8) { + let sum = 0; + for (const bit of iterateBits(data, lhs, rhs - lhs, data, getBit)) { + sum += bit; + } + return sum; + } + // Get the next lowest multiple of 8 from the right hand side + const rhsInside = rhs >> 3 << 3; + // Get the next highest multiple of 8 from the left hand side + const lhsInside = lhs + (lhs % 8 === 0 ? 0 : 8 - lhs % 8); + return ( + // Get the popcnt of bits between the left hand side, and the next highest multiple of 8 + popcnt_bit_range(data, lhs, lhsInside) + + // Get the popcnt of bits between the right hand side, and the next lowest multiple of 8 + popcnt_bit_range(data, rhsInside, rhs) + + // Get the popcnt of all bits between the left and right hand sides' multiples of 8 + popcnt_array(data, lhsInside >> 3, (rhsInside - lhsInside) >> 3) + ); +} + +export function popcnt_array(arr: TypedArray, byteOffset?: number, byteLength?: number) { + let cnt = 0, pos = byteOffset! | 0; + const view = new DataView(arr.buffer, arr.byteOffset, arr.byteLength); + const len = byteLength === void 0 ? arr.byteLength : pos + byteLength; + while (len - pos >= 4) { + cnt += popcnt_uint32(view.getUint32(pos)); + pos += 4; + } + while (len - pos >= 2) { + cnt += popcnt_uint32(view.getUint16(pos)); + pos += 2; + } + while (len - pos >= 1) { + cnt += popcnt_uint32(view.getUint8(pos)); + pos += 1; + } + return cnt; +} + +export function popcnt_uint32(uint32: number): number { + let i = uint32 | 0; + i = i - ((i >>> 1) & 0x55555555); + i = (i & 0x33333333) + ((i >>> 2) & 0x33333333); + return (((i + (i >>> 4)) & 0x0F0F0F0F) * 0x01010101) >>> 24; +} diff --git a/js/src/util/layout.ts b/js/src/util/layout.ts index c064ee9d7d0b0..5f62f4fd9516f 100644 --- a/js/src/util/layout.ts +++ b/js/src/util/layout.ts @@ -16,7 +16,7 @@ // under the License. import { TextEncoder } from 'text-encoding-utf-8'; -import { TypedArrayConstructor, TypedArray } from '../vector/types'; +import { TypedArrayConstructor, TypedArray } from '../type'; export function align(value: number, alignment: number) { return value + padding(value, alignment); diff --git a/js/src/vector.ts b/js/src/vector.ts new file mode 100644 index 0000000000000..84bdf7b8b63fb --- /dev/null +++ b/js/src/vector.ts @@ -0,0 +1,257 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Data } from './data'; +import { VisitorNode, TypeVisitor, VectorVisitor } from './visitor'; +import { DataType, ListType, FlatType, NestedType } from './type'; +import { IterableArrayLike, Precision, DateUnit, IntervalUnit, UnionMode } from './type'; + +export interface VectorLike { length: number; nullCount: number; } + +export interface View { + isValid(index: number): boolean; + get(index: number): T['TValue'] | null; + toArray(): IterableArrayLike; + [Symbol.iterator](): IterableIterator; +} + +export class Vector implements VectorLike, View, Partial { + // @ts-ignore + protected _data: Data; + // @ts-ignore + protected _view: View; + constructor(data: Data, view: View) { + this._view = view; + const nullBitmap = (this._data = data).nullBitmap; + if (nullBitmap && nullBitmap.length > 0 && data.nullCount > 0) { + this._view = new ValidityView(data, this._view); + } + } + + public get data() { return this._data; } + public get type() { return this._data.type; } + public get length() { return this._data.length; } + public get nullCount() { return this._data.nullCount; } + public get nullBitmap() { return this._data.nullBitmap; } + public get [Symbol.toStringTag]() { return `Vector<${this.type[Symbol.toStringTag]}>`; } + + public isValid(index: number): boolean { return this._view.isValid(index); } + public get(index: number): T['TValue'] | null { return this._view.get(index); } + public toArray(): IterableArrayLike { return this._view.toArray(); } + public [Symbol.iterator](): IterableIterator { return this._view[Symbol.iterator](); } + public slice(begin?: number, end?: number): this { + let total = this.length, from = begin || 0; + let to = typeof end === 'number' ? end : total; + if (to < 0) { to = total + to; } + if (from < 0) { from = total - (from * -1) % total; } + if (to < from) { from = to; to = begin || 0; } + total = !isFinite(total = (to - from)) || total < 0 ? 0 : total; + const data = this._data.slice(from, Math.min(total, this.length)); + return new (this.constructor as any)(data, this._view) as this; + } + + public acceptTypeVisitor(visitor: TypeVisitor): any { + return TypeVisitor.visitTypeInline(visitor, this.type); + } + public acceptVectorVisitor(visitor: VectorVisitor): any { + return VectorVisitor.visitTypeInline(visitor, this.type, this); + } +} + +export abstract class FlatVector extends Vector { + public get values() { return this._data.values; } +} + +export abstract class ListVectorBase extends Vector { + public get values() { return this._data.values; } + public get valueOffsets() { return this._data.valueOffsets; } + public getValueOffset(index: number) { + return this.data.valueOffsets[index]; + } + public getValueLength(index: number) { + return this.data.valueOffsets[index + 1] - this.data.valueOffsets[index]; + } +} + +export abstract class NestedVector extends Vector { + // @ts-ignore + protected _view: NestedView; + public get childData(): Data[] { + return this.data.childData; + } + public getChildAt(index: number) { + return this._view.getChildAt(index); + } +} + +import { List, Binary, Utf8, Bool, } from './type'; +import { Null, Int, Float, Float16, Decimal, Date_, Time, Timestamp, Interval } from './type'; +import { Struct, Union, SparseUnion, DenseUnion, FixedSizeBinary, FixedSizeList, Map_, Dictionary } from './type'; + +import { DictionaryView } from './vector/dictionary'; +import { ListView, FixedSizeListView, BinaryView, Utf8View } from './vector/list'; +import { UnionView, DenseUnionView, NestedView, StructView, MapView } from './vector/nested'; +import { FlatView, NullView, BoolView, ValidityView, FixedSizeView, Float16View, DateDayView, DateMillisecondView, IntervalYearMonthView } from './vector/flat'; + +export class NullVector extends Vector { + constructor(data: Data) { + super(data, new NullView(data)); + } +} + +export class BoolVector extends Vector { + constructor(data: Data) { + super(data, new BoolView(data)); + } +} + +export class IntVector extends FlatVector> { + constructor(data: Data, view: View = IntVector.viewForBitWidth(data)) { + super(data, view); + } + static viewForBitWidth(data: Data) { + return data.type.bitWidth <= 32 ? new FlatView(data) : new FixedSizeView(data, (data.type.bitWidth / 32) | 0); + } +} + +export class FloatVector extends FlatVector> { + constructor(data: Data) { + super(data, data.type.precision !== Precision.HALF ? + new FlatView(data) : + new Float16View(data as Data)); + } +} + +export class DateVector extends FlatVector { + constructor(data: Data) { + super(data, data.type.unit === DateUnit.DAY ? new DateDayView(data) : new DateMillisecondView(data, 2)); + } +} + +export class DecimalVector extends FlatVector { + constructor(data: Data) { + super(data, new FixedSizeView(data, 4)); + } +} + +export class TimeVector extends FlatVector