Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-40959: [JS] Store Timestamps in 64 bits #40960

Merged
merged 19 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions js/src/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -333,16 +333,28 @@ export class Decimal extends DataType<Type.Decimal> {
/** @ignore */
export type Dates = Type.Date | Type.DateDay | Type.DateMillisecond;
/** @ignore */
export interface Date_<T extends Dates = Dates> extends DataType<T> { TArray: Int32Array; TValue: Date; ArrayType: TypedArrayConstructor<Int32Array> }
type DateType = {
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved
[Type.Date]: { TArray: Int32Array | BigInt64Array };
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved
[Type.DateDay]: { TArray: Int32Array };
[Type.DateMillisecond]: { TArray: BigInt64Array };
};
/** @ignore */
export interface Date_<T extends Dates = Dates> extends DataType<T> {
TArray: DateType[T]['TArray'];
TValue: number;
}
/** @ignore */
export class Date_<T extends Dates = Dates> extends DataType<T> {
constructor(public readonly unit: DateUnit) {
super(Type.Date as T);
trxcllnt marked this conversation as resolved.
Show resolved Hide resolved
}
public toString() { return `Date${(this.unit + 1) * 32}<${DateUnit[this.unit]}>`; }

public get ArrayType() {
return this.unit === DateUnit.DAY ? Int32Array : BigInt64Array;
}
protected static [Symbol.toStringTag] = ((proto: Date_) => {
(<any>proto).unit = null;
(<any>proto).ArrayType = Int32Array;
return proto[Symbol.toStringTag] = 'Date';
})(Date_.prototype);
}
Expand Down Expand Up @@ -417,9 +429,9 @@ export class TimeNanosecond extends Time_<Type.TimeNanosecond> { constructor() {
type Timestamps = Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond;
/** @ignore */
interface Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
TArray: Int32Array;
TArray: BigInt64Array;
TValue: number;
ArrayType: TypedArrayConstructor<Int32Array>;
ArrayType: BigIntArrayConstructor<BigInt64Array>;
}

/** @ignore */
Expand All @@ -432,7 +444,7 @@ class Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
protected static [Symbol.toStringTag] = ((proto: Timestamp_) => {
(<any>proto).unit = null;
(<any>proto).timezone = null;
(<any>proto).ArrayType = Int32Array;
(<any>proto).ArrayType = BigInt64Array;
return proto[Symbol.toStringTag] = 'Timestamp';
})(Timestamp_.prototype);
}
Expand Down Expand Up @@ -483,7 +495,7 @@ type Durations = Type.Duration | Type.DurationSecond | Type.DurationMillisecond
export interface Duration<T extends Durations = Durations> extends DataType<T> {
TArray: BigInt64Array;
TValue: bigint;
ArrayType: BigInt64Array;
ArrayType: BigIntArrayConstructor<BigInt64Array>;
}

/** @ignore */
Expand Down Expand Up @@ -737,8 +749,6 @@ export function strideForType(type: DataType) {
const t: any = type;
switch (type.typeId) {
case Type.Decimal: return (type as Decimal).bitWidth / 32;
case Type.Timestamp: return 2;
case Type.Date: return 1 + (t as Date_).unit;
case Type.Interval: return 1 + (t as Interval_).unit;
// case Type.Int: return 1 + +((t as Int_).bitWidth > 32);
// case Type.Time: return 1 + +((t as Time_).bitWidth > 32);
Expand Down
13 changes: 13 additions & 0 deletions js/src/util/bigint.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,16 @@ export function bigIntToNumber(number: bigint | number): number {
}
return Number(number);
}

/**
* Duivides the bigint number by the divisor and returns the result as a number.
* Dividing bigints always results in bigints so we don't get the remainder.
* This function gives us the remainder but assumes that the result fits into a number.
*
* @param number The number to divide.
* @param divisor The divisor.
* @returns The result of the division as a number.
*/
export function divideBigInts(number: bigint, divisor: bigint): number {
return bigIntToNumber(number / divisor) + bigIntToNumber(number % divisor) / bigIntToNumber(divisor);
}
21 changes: 7 additions & 14 deletions js/src/visitor/get.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import { Vector } from '../vector.js';
import { Visitor } from '../visitor.js';
import { MapRow } from '../row/map.js';
import { StructRow, StructRowProxy } from '../row/struct.js';
import { bigIntToNumber } from '../util/bigint.js';
import { bigIntToNumber, divideBigInts } from '../util/bigint.js';
import { decodeUtf8 } from '../util/utf8.js';
import { TypeToDataType } from '../interfaces.js';
import { uint16ToFloat64 } from '../util/math.js';
Expand Down Expand Up @@ -106,13 +106,6 @@ function wrapGet<T extends DataType>(fn: (data: Data<T>, _1: any) => any) {
}

/** @ignore */const epochDaysToMs = (data: Int32Array, index: number) => 86400000 * data[index];
/** @ignore */const epochMillisecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1]) + (data[index] >>> 0);
/** @ignore */const epochMicrosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000) + ((data[index] >>> 0) / 1000);
/** @ignore */const epochNanosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000000) + ((data[index] >>> 0) / 1000000);

/** @ignore */const epochMillisecondsToDate = (epochMs: number) => new Date(epochMs);
/** @ignore */const epochDaysToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochDaysToMs(data, index));
/** @ignore */const epochMillisecondsLongToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochMillisecondsLongToMs(data, index));

/** @ignore */
const getNull = <T extends Null>(_data: Data<T>, _index: number): T['TValue'] => null;
Expand All @@ -139,9 +132,9 @@ type Numeric1X = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32 | Float32 | Floa
type Numeric2X = Int64 | Uint64;

/** @ignore */
const getDateDay = <T extends DateDay>({ values }: Data<T>, index: number): T['TValue'] => epochDaysToDate(values, index);
const getDateDay = <T extends DateDay>({ values }: Data<T>, index: number): T['TValue'] => epochDaysToMs(values, index);
/** @ignore */
const getDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number): T['TValue'] => epochMillisecondsLongToDate(values, index * 2);
const getDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number): T['TValue'] => bigIntToNumber(values[index]);
/** @ignore */
const getNumeric = <T extends Numeric1X>({ stride, values }: Data<T>, index: number): T['TValue'] => values[stride * index];
/** @ignore */
Expand Down Expand Up @@ -178,13 +171,13 @@ const getDate = <T extends Date_>(data: Data<T>, index: number): T['TValue'] =>
);

/** @ignore */
const getTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number): T['TValue'] => 1000 * epochMillisecondsLongToMs(values, index * 2);
const getTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number): T['TValue'] => 1000 * bigIntToNumber(values[index]);
/** @ignore */
const getTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number): T['TValue'] => epochMillisecondsLongToMs(values, index * 2);
const getTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number): T['TValue'] => bigIntToNumber(values[index]);
/** @ignore */
const getTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number): T['TValue'] => epochMicrosecondsLongToMs(values, index * 2);
const getTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number): T['TValue'] => divideBigInts(values[index], BigInt(1000));
/** @ignore */
const getTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number): T['TValue'] => epochNanosecondsLongToMs(values, index * 2);
const getTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number): T['TValue'] => divideBigInts(values[index], BigInt(1000000));
/* istanbul ignore next */
/** @ignore */
const getTimestamp = <T extends Timestamp>(data: Data<T>, index: number): T['TValue'] => {
Expand Down
9 changes: 5 additions & 4 deletions js/src/visitor/iterator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,11 @@ function vectorIterator<T extends DataType>(vector: Vector<T>): IterableIterator

// Fast case, defer to native iterators if possible
if (vector.nullCount === 0 && vector.stride === 1 && (
(type.typeId === Type.Timestamp) ||
(type instanceof Int && (type as Int).bitWidth !== 64) ||
(type instanceof Time && (type as Time).bitWidth !== 64) ||
(type instanceof Float && (type as Float).precision !== Precision.HALF)
// Don't defer to native iterator for timestamps since Numbers are expected
// (DataType.isTimestamp(type)) && type.unit === TimeUnit.MILLISECOND ||
(DataType.isInt(type) && type.bitWidth !== 64) ||
(DataType.isTime(type) && type.bitWidth !== 64) ||
(DataType.isFloat(type) && type.precision !== Precision.HALF)
)) {
return new ChunkedIterator(vector.data.length, (chunkIndex) => {
const data = vector.data[chunkIndex];
Expand Down
25 changes: 5 additions & 20 deletions js/src/visitor/set.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,21 +109,6 @@ function wrapSet<T extends DataType>(fn: (data: Data<T>, _1: any, _2: any) => vo

/** @ignore */
export const setEpochMsToDays = (data: Int32Array, index: number, epochMs: number) => { data[index] = Math.floor(epochMs / 86400000); };
/** @ignore */
export const setEpochMsToMillisecondsLong = (data: Int32Array, index: number, epochMs: number) => {
data[index] = Math.floor(epochMs % 4294967296);
data[index + 1] = Math.floor(epochMs / 4294967296);
};
/** @ignore */
export const setEpochMsToMicrosecondsLong = (data: Int32Array, index: number, epochMs: number) => {
data[index] = Math.floor((epochMs * 1000) % 4294967296);
data[index + 1] = Math.floor((epochMs * 1000) / 4294967296);
};
/** @ignore */
export const setEpochMsToNanosecondsLong = (data: Int32Array, index: number, epochMs: number) => {
data[index] = Math.floor((epochMs * 1000000) % 4294967296);
data[index + 1] = Math.floor((epochMs * 1000000) / 4294967296);
};

/** @ignore */
export const setVariableWidthBytes = <T extends Int32Array | BigInt64Array>(values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
Expand Down Expand Up @@ -161,7 +146,7 @@ export const setAnyFloat = <T extends Float>(data: Data<T>, index: number, value
/** @ignore */
export const setDateDay = <T extends DateDay>({ values }: Data<T>, index: number, value: T['TValue']): void => { setEpochMsToDays(values, index, value.valueOf()); };
/** @ignore */
export const setDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { setEpochMsToMillisecondsLong(values, index * 2, value.valueOf()); };
export const setDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value); };
/** @ignore */
export const setFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: Data<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, stride), stride * index); };

Expand All @@ -178,13 +163,13 @@ export const setDate = <T extends Date_>(data: Data<T>, index: number, value: T[
};

/** @ignore */
export const setTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value / 1000);
export const setTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value / 1000); };
/** @ignore */
export const setTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value);
export const setTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value); };
/** @ignore */
export const setTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToMicrosecondsLong(values, index * 2, value);
export const setTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value * 1000); };
/** @ignore */
export const setTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToNanosecondsLong(values, index * 2, value);
export const setTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value * 1000000); };
/* istanbul ignore next */
/** @ignore */
export const setTimestamp = <T extends Timestamp>(data: Data<T>, index: number, value: T['TValue']): void => {
Expand Down
53 changes: 24 additions & 29 deletions js/test/generate-test-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -402,10 +402,7 @@ function generateDate<T extends Date_>(this: TestDataVectorGenerator, type: T, l
const data = type.unit === DateUnit.DAY
? createDate32(length, nullBitmap, values)
: createDate64(length, nullBitmap, values);
return {
values: () => values.map((x) => x == null ? null : new Date(x)),
vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })])
};
return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) };
}

function generateTimestamp<T extends Timestamp>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector<T> {
Expand Down Expand Up @@ -649,6 +646,7 @@ type TypedArrayConstructor =


const rand = Math.random.bind(Math);
const randSign = () => rand() > 0.5 ? -1 : 1;
const randomBytes = (length: number) => fillRandom(Uint8Array, length);

const memoize = (fn: () => any) => ((x?: any) => () => x || (x = fn()))();
Expand All @@ -661,15 +659,15 @@ function fillRandom<T extends TypedArrayConstructor>(ArrayType: T, length: numbe
const BPE = ArrayType.BYTES_PER_ELEMENT;
const array = new ArrayType(length);
const max = (2 ** (8 * BPE)) - 1;
for (let i = -1; ++i < length; array[i] = rand() * max * (rand() > 0.5 ? -1 : 1));
for (let i = -1; ++i < length; array[i] = rand() * max * randSign());
return array as InstanceType<T>;
}

function fillRandomBigInt<T extends (typeof BigInt64Array) | (typeof BigUint64Array)>(ArrayType: T, length: number) {
const BPE = ArrayType.BYTES_PER_ELEMENT;
const array = new ArrayType(length);
const max = (2 ** (8 * BPE)) - 1;
for (let i = -1; ++i < length; array[i] = BigInt(rand() * max * (rand() > 0.5 ? -1 : 1)));
for (let i = -1; ++i < length; array[i] = BigInt(rand() * max * randSign()));
return array as InstanceType<T>;
}

Expand Down Expand Up @@ -735,47 +733,44 @@ function createVariableWidthBytes(length: number, nullBitmap: Uint8Array, offset
return bytes;
}

/**
* Creates timestamps with the accuracy of days (86400000 millisecond).
*/
function createDate32(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) {
const data = new Int32Array(length).fill(Math.trunc(Date.now() / 86400000));
iterateBitmap(length, nullBitmap, (i, valid) => {
if (!valid) {
data[i] = 0;
values[i] = null;
} else {
data[i] = Math.trunc(data[i] + (rand() * 10000 * (rand() > 0.5 ? -1 : 1)));
data[i] = Math.trunc(data[i] + (rand() * 10000 * randSign()));
values[i] = data[i] * 86400000;
}
});
return data;
}

function createDate64(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) {
const data = new Int32Array(length * 2).fill(0);
const data32 = createDate32(length, nullBitmap, values);
iterateBitmap(length, nullBitmap, (i, valid) => {
if (valid) {
const value = data32[i] * 86400000;
const hi = Math.trunc(value / 4294967296);
const lo = Math.trunc(value - 4294967296 * hi);
values[i] = value;
data[i * 2 + 0] = lo;
data[i * 2 + 1] = hi;
}
});
return data;
return BigInt64Array.from(data32, x => BigInt(x * 86400000));
}

function divideBigInts(number: bigint, divisor: bigint): number {
return Number(number / divisor) + Number(number % divisor) / Number(divisor);
}

function createTimestamp(length: number, nullBitmap: Uint8Array, multiple: number, values: (number | null)[] = []) {
const mult = 86400 * multiple;
const data = new Int32Array(length * 2).fill(0);
const data32 = createDate32(length, nullBitmap, values);
const data = new BigInt64Array(length).fill(0n);
const tenYears = 10 * 365 * 24 * 60 * 60 * multiple;
const now = Math.trunc(Date.now() / 1000 * multiple);
iterateBitmap(length, nullBitmap, (i, valid) => {
if (valid) {
const value = data32[i] * mult;
const hi = Math.trunc(value / 4294967296);
const lo = Math.trunc(value - 4294967296 * hi);
data[i * 2 + 0] = lo;
data[i * 2 + 1] = hi;
if (!valid) {
data[i] = 0n;
values[i] = null;
} else {
const value = BigInt(now + Math.trunc(rand() * randSign() * tenYears));
data[i] = value;
values[i] = divideBigInts(value * 1000n, BigInt(multiple));
}
});
return data;
Expand All @@ -788,7 +783,7 @@ function createTime32(length: number, nullBitmap: Uint8Array, multiple: number,
data[i] = 0;
values[i] = null;
} else {
values[i] = data[i] = ((1000 * rand()) | 0 * multiple) * (rand() > 0.5 ? -1 : 1);
values[i] = data[i] = ((1000 * rand()) | 0 * multiple) * randSign();
}
});
return data;
Expand Down
Loading
Loading