Skip to content

Commit

Permalink
Add DateTime.buildFormatParser and DateTime.fromFormatParser (#1582)
Browse files Browse the repository at this point in the history
This allows constructing a parser for a locale/format and reusing it
when parsing dates. Without this, DateTime.fromFormat constructs a new
parser on every call. When parsing large amounts of date strings, this
gets rather slow.

In benchmarks, this speeds up parsing by 4.4x
  • Loading branch information
schleyfox authored Mar 9, 2024
1 parent 6b0ec2e commit 69032e6
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 18 deletions.
10 changes: 10 additions & 0 deletions benchmarks/datetime.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ function runDateTimeSuite() {

const dt = DateTime.now();

const formatParser = DateTime.buildFormatParser("yyyy/MM/dd HH:mm:ss.SSS");

suite
.add("DateTime.local", () => {
DateTime.now();
Expand All @@ -32,6 +34,14 @@ function runDateTimeSuite() {
zone: "America/Los_Angeles",
});
})
.add("DateTime.fromFormatParser", () => {
DateTime.fromFormatParser("1982/05/25 09:10:11.445", formatParser);
})
.add("DateTime.fromFormatParser with zone", () => {
DateTime.fromFormatParser("1982/05/25 09:10:11.445", formatParser, {
zone: "America/Los_Angeles",
});
})
.add("DateTime#setZone", () => {
dt.setZone("America/Los_Angeles");
})
Expand Down
69 changes: 69 additions & 0 deletions src/datetime.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import {
explainFromTokens,
formatOptsToTokens,
expandMacroTokens,
TokenParser,
} from "./impl/tokenParser.js";
import {
gregorianToWeek,
Expand Down Expand Up @@ -2233,6 +2234,74 @@ export default class DateTime {
return DateTime.fromFormatExplain(text, fmt, options);
}

/**
* Build a parser for `fmt` using the given locale. This parser can be passed
* to {@link DateTime.fromFormatParser} to a parse a date in this format. This
* can be used to optimize cases where many dates need to be parsed in a
* specific format.
*
* @param {String} fmt - the format the string is expected to be in (see
* description)
* @param {Object} options - options used to set locale and numberingSystem
* for parser
* @returns {TokenParser} - opaque object to be used
*/
static buildFormatParser(fmt, options = {}) {
const { locale = null, numberingSystem = null } = options,
localeToUse = Locale.fromOpts({
locale,
numberingSystem,
defaultToEN: true,
});
return new TokenParser(localeToUse, fmt);
}

/**
* Create a DateTime from an input string and format parser.
*
* The format parser must have been created with the same locale as this call.
*
* @param {String} text - the string to parse
* @param {TokenParser} formatParser - parser from {@link DateTime.buildFormatParser}
* @param {Object} opts - options taken by fromFormat()
* @returns {DateTime}
*/
static fromFormatParser(text, formatParser, opts = {}) {
if (isUndefined(text) || isUndefined(formatParser)) {
throw new InvalidArgumentError(
"fromFormatParser requires an input string and a format parser"
);
}
const { locale = null, numberingSystem = null } = opts,
localeToUse = Locale.fromOpts({
locale,
numberingSystem,
defaultToEN: true,
});

if (!localeToUse.equals(formatParser.locale)) {
throw new InvalidArgumentError(
`fromFormatParser called with a locale of ${localeToUse}, ` +
`but the format parser was created for ${formatParser.locale}`
);
}

const { result, zone, specificOffset, invalidReason } = formatParser.explainFromTokens(text);

if (invalidReason) {
return DateTime.invalid(invalidReason);
} else {
return parseDataToDateTime(
result,
zone,
opts,
`format ${formatParser.format}`,
text,
specificOffset
);
}
}

// FORMAT PRESETS

/**
Expand Down
4 changes: 4 additions & 0 deletions src/impl/locale.js
Original file line number Diff line number Diff line change
Expand Up @@ -539,4 +539,8 @@ export default class Locale {
this.outputCalendar === other.outputCalendar
);
}

toString() {
return `Locale(${this.locale}, ${this.numberingSystem}, ${this.outputCalendar})`;
}
}
68 changes: 50 additions & 18 deletions src/impl/tokenParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -432,27 +432,59 @@ export function expandMacroTokens(tokens, locale) {
* @private
*/

export function explainFromTokens(locale, input, format) {
const tokens = expandMacroTokens(Formatter.parseFormat(format), locale),
units = tokens.map((t) => unitForToken(t, locale)),
disqualifyingUnit = units.find((t) => t.invalidReason);
export class TokenParser {
constructor(locale, format) {
this.locale = locale;
this.format = format;
this.tokens = expandMacroTokens(Formatter.parseFormat(format), locale);
this.units = this.tokens.map((t) => unitForToken(t, locale));
this.disqualifyingUnit = this.units.find((t) => t.invalidReason);

if (!this.disqualifyingUnit) {
const [regexString, handlers] = buildRegex(this.units);
this.regex = RegExp(regexString, "i");
this.handlers = handlers;
}
}

if (disqualifyingUnit) {
return { input, tokens, invalidReason: disqualifyingUnit.invalidReason };
} else {
const [regexString, handlers] = buildRegex(units),
regex = RegExp(regexString, "i"),
[rawMatches, matches] = match(input, regex, handlers),
[result, zone, specificOffset] = matches
? dateTimeFromMatches(matches)
: [null, null, undefined];
if (hasOwnProperty(matches, "a") && hasOwnProperty(matches, "H")) {
throw new ConflictingSpecificationError(
"Can't include meridiem when specifying 24-hour format"
);
explainFromTokens(input) {
if (!this.isValid) {
return { input, tokens: this.tokens, invalidReason: this.invalidReason };
} else {
const [rawMatches, matches] = match(input, this.regex, this.handlers),
[result, zone, specificOffset] = matches
? dateTimeFromMatches(matches)
: [null, null, undefined];
if (hasOwnProperty(matches, "a") && hasOwnProperty(matches, "H")) {
throw new ConflictingSpecificationError(
"Can't include meridiem when specifying 24-hour format"
);
}
return {
input,
tokens: this.tokens,
regex: this.regex,
rawMatches,
matches,
result,
zone,
specificOffset,
};
}
return { input, tokens, regex, rawMatches, matches, result, zone, specificOffset };
}

get isValid() {
return !this.disqualifyingUnit;
}

get invalidReason() {
return this.disqualifyingUnit ? this.disqualifyingUnit.invalidReason : null;
}
}

export function explainFromTokens(locale, input, format) {
const parser = new TokenParser(locale, format);
return parser.explainFromTokens(input);
}

export function parseFromTokens(locale, input, format) {
Expand Down
25 changes: 25 additions & 0 deletions test/datetime/tokenParse.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -1224,3 +1224,28 @@ test("DateTime.expandFormat respects the hour cycle when forced by the macro tok
const format = DateTime.expandFormat("T", { locale: "en-US" });
expect(format).toBe("H:m");
});

//------
// .fromFormatParser
//-------

test("DateTime.fromFormatParser behaves equivalently to DateTime.fromFormat", () => {
const dateTimeStr = "1982/05/25 09:10:11.445";
const format = "yyyy/MM/dd HH:mm:ss.SSS";
const formatParser = DateTime.buildFormatParser(format);
const ff1 = DateTime.fromFormat(dateTimeStr, format),
ffP1 = DateTime.fromFormatParser(dateTimeStr, formatParser);

expect(ffP1).toEqual(ff1);
expect(ffP1.isValid).toBe(true);
});

test("DateTime.fromFormatParser throws error when used with a different locale than it was created with", () => {
const format = "yyyy/MM/dd HH:mm:ss.SSS";
const formatParser = DateTime.buildFormatParser(format, { locale: "es-ES" });
expect(() =>
DateTime.fromFormatParser("1982/05/25 09:10:11.445", formatParser, { locale: "es-MX" })
).toThrowError(
"fromFormatParser called with a locale of Locale(es-MX, null, null), but the format parser was created for Locale(es-ES, null, null)"
);
});

0 comments on commit 69032e6

Please sign in to comment.