Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-40891: [JS] Store Dates as TimestampMillisecond #40892

Merged
merged 4 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions js/src/factories.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ export function makeBuilder<T extends dtypes.DataType = any, TNull = any>(option
export function vectorFromArray(values: readonly (null | undefined)[], type?: dtypes.Null): Vector<dtypes.Null>;
export function vectorFromArray(values: readonly (null | undefined | boolean)[], type?: dtypes.Bool): Vector<dtypes.Bool>;
export function vectorFromArray<T extends dtypes.Utf8 | dtypes.Dictionary<dtypes.Utf8> = dtypes.Dictionary<dtypes.Utf8, dtypes.Int32>>(values: readonly (null | undefined | string)[], type?: T): Vector<T>;
export function vectorFromArray<T extends dtypes.Date_>(values: readonly (null | undefined | Date)[], type?: T): Vector<T>;
export function vectorFromArray<T extends dtypes.TimestampMillisecond>(values: readonly (null | undefined | Date)[], type?: T): Vector<T>;
export function vectorFromArray<T extends dtypes.Int>(values: readonly (null | undefined | number)[], type: T): Vector<T>;
export function vectorFromArray<T extends dtypes.Int64 | dtypes.Uint64 = dtypes.Int64>(values: readonly (null | undefined | bigint)[], type?: T): Vector<T>;
export function vectorFromArray<T extends dtypes.Float = dtypes.Float64>(values: readonly (null | undefined | number)[], type?: T): Vector<T>;
Expand Down Expand Up @@ -145,7 +145,7 @@ function inferType(value: readonly unknown[]): dtypes.DataType {
} else if (booleansCount + nullsCount === value.length) {
return new dtypes.Bool;
} else if (datesCount + nullsCount === value.length) {
return new dtypes.DateMillisecond;
return new dtypes.TimestampMillisecond;
} else if (arraysCount + nullsCount === value.length) {
const array = value as Array<unknown>[];
const childType = inferType(array[array.findIndex((ary) => ary != null)]);
Expand Down
14 changes: 13 additions & 1 deletion js/src/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,19 @@ export class Date_<T extends Dates = Dates> extends DataType<T> {

/** @ignore */
export class DateDay extends Date_<Type.DateDay> { constructor() { super(DateUnit.DAY); } }
/** @ignore */
/**
* A signed 64-bit date representing the elapsed time since UNIX epoch (1970-01-01) in milliseconds.
* According to the specification, this should be treated as the number of days, in milliseconds, since the UNIX epoch.
* Therefore, values must be evenly divisible by `86_400_000` (the number of milliseconds in a standard day).
*
* Practically, validation that values of this type are evenly divisible by `86_400_000` is not enforced by this library
* for performance and usability reasons.
*
* Users should prefer to use {@link DateDay} to cleanly represent the number of days. For JS dates,
* {@link TimestampMillisecond} is the preferred type.
*
* @ignore
*/
export class DateMillisecond extends Date_<Type.DateMillisecond> { constructor() { super(DateUnit.MILLISECOND); } }

/** @ignore */
Expand Down
19 changes: 14 additions & 5 deletions js/test/unit/vector/date-vector-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,19 @@
// specific language governing permissions and limitations
// under the License.

import { DateDay, DateMillisecond, RecordBatchReader, Table, vectorFromArray } from 'apache-arrow';
import { DateDay, DateMillisecond, TimestampMillisecond, RecordBatchReader, Table, vectorFromArray } from 'apache-arrow';

describe(`TimestampVector`, () => {
test(`Dates are stored in TimestampMillisecond`, () => {
const date = new Date('2023-02-01T12:34:56Z');
const vec = vectorFromArray([date]);
expect(vec.type).toBeInstanceOf(TimestampMillisecond);
expect(vec.get(0)).toBe(date.valueOf());
});
});

describe(`DateVector`, () => {
it('returns days since the epoch as correct JS Dates', () => {
test(`returns days since the epoch as correct JS Dates`, () => {
const table = new Table(RecordBatchReader.from(test_data));
const expectedMillis = expectedMillis32();
const date32 = table.getChildAt<DateDay>(0)!;
Expand All @@ -28,7 +37,7 @@ describe(`DateVector`, () => {
}
});

it('returns millisecond longs since the epoch as correct JS Dates', () => {
test(`returns millisecond longs since the epoch as correct JS Dates`, () => {
const table = new Table(RecordBatchReader.from(test_data));
const expectedMillis = expectedMillis64();
const date64 = table.getChildAt<DateMillisecond>(1)!;
Expand All @@ -38,9 +47,9 @@ describe(`DateVector`, () => {
}
});

it('returns the same date that was in the vector', () => {
test(`returns the same date that was in the vector`, () => {
const dates = [new Date(1950, 1, 0)];
const vec = vectorFromArray(dates);
const vec = vectorFromArray(dates, new DateMillisecond());
for (const date of vec) {
expect(date).toEqual(dates.shift());
}
Expand Down
Loading