From d886b3aa5efce00f1929cb6c94ae9b69c4f8f8a7 Mon Sep 17 00:00:00 2001 From: Iban Eguia Moraza Date: Wed, 8 Jun 2022 20:37:15 +0200 Subject: [PATCH] Integrate ICU4X into `Intl` module (#2083) This Pull Request integrates an `ICU4X` data provider API in our codebase, to make use of the internationalization APIs that this crate provides. It changes the following: - Creates an API for pluggable icu data providers at `Context` creation, adding an `Icu` struct to store the provider (and some other internationalization tools) at runtime. - Slightly changes locale related functions to preserve the `Locale` type and ensure correctness. (Will make some other changes related to this). - Integrates the `sys_locale` crate to fetch the current default locale of an user instead of always returning `en-US`. --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- .github/ISSUE_TEMPLATE/feature_request.md | 2 +- .github/workflows/bors.yml | 2 +- .github/workflows/master.yml | 2 +- .github/workflows/rust.yml | 8 +- Cargo.lock | 171 +++++----- boa_cli/Cargo.toml | 2 +- boa_engine/Cargo.toml | 19 +- boa_engine/src/builtins/intl/mod.rs | 311 +++++++++++------- boa_engine/src/builtins/intl/tests.rs | 75 ++++- boa_engine/src/builtins/map/map_iterator.rs | 2 +- boa_engine/src/builtins/map/mod.rs | 2 +- boa_engine/src/builtins/mod.rs | 14 +- boa_engine/src/builtins/set/set_iterator.rs | 2 +- boa_engine/src/context/icu.rs | 79 +++++ boa_engine/src/context/mod.rs | 137 ++++++-- boa_engine/src/lib.rs | 6 +- boa_engine/src/object/jsproxy.rs | 1 + boa_engine/src/object/mod.rs | 7 +- boa_engine/src/syntax/ast/keyword.rs | 4 +- boa_engine/src/syntax/lexer/regex.rs | 2 +- .../hoistable/async_function_decl/mod.rs | 2 +- boa_engine/src/syntax/parser/tests.rs | 5 +- boa_gc/Cargo.toml | 2 +- boa_interner/Cargo.toml | 2 +- boa_profiler/Cargo.toml | 2 +- boa_tester/Cargo.toml | 4 +- boa_unicode/Cargo.toml | 2 +- boa_wasm/Cargo.toml | 2 +- 29 files changed, 580 insertions(+), 291 deletions(-) create mode 100644 boa_engine/src/context/icu.rs diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 9773988656b..6e249842494 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -34,7 +34,7 @@ Explain what you expected to happen, and what is happening instead. **Build environment (please complete the following information):** diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index a78986bab9c..7687b7befe2 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -16,7 +16,7 @@ Explain the ECMAScript feature that you'd like to see implemented. **Example code** diff --git a/.github/workflows/bors.yml b/.github/workflows/bors.yml index 392ac1f7e08..af3785231ad 100644 --- a/.github/workflows/bors.yml +++ b/.github/workflows/bors.yml @@ -159,4 +159,4 @@ jobs: uses: actions-rs/cargo@v1 with: command: doc - args: -v --document-private-items + args: -v --document-private-items --all-features diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 6a4634db0cf..ed1605e7987 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -29,7 +29,7 @@ jobs: uses: actions-rs/cargo@v1 with: command: doc - args: -v --document-private-items + args: -v --document-private-items --all-features - run: echo "" > target/doc/index.html - run: | if [ -d target/doc_upload ]; then rm -rf target/doc_upload; fi diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index c8b6efbb9fe..e7c170dff0d 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -30,7 +30,7 @@ jobs: - name: Run cargo-tarpaulin uses: actions-rs/tarpaulin@v0.1 with: - args: --ignore-tests + args: --features intl --ignore-tests - name: Upload to codecov.io uses: codecov/codecov-action@v3 @@ -55,7 +55,7 @@ jobs: - uses: actions-rs/cargo@v1 with: command: test - args: -v + args: -v --features intl test_on_macos: name: Tests on MacOS @@ -70,7 +70,7 @@ jobs: - uses: actions-rs/cargo@v1 with: command: test - args: -v + args: -v --features intl fmt: name: Rustfmt @@ -163,4 +163,4 @@ jobs: uses: actions-rs/cargo@v1 with: command: doc - args: -v --document-private-items + args: -v --document-private-items --all-features diff --git a/Cargo.lock b/Cargo.lock index efd2a256017..af1c408efe3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -100,7 +100,12 @@ dependencies = [ "fast-float", "float-cmp", "gc", - "icu", + "icu_datetime", + "icu_locale_canonicalizer", + "icu_locid", + "icu_plurals", + "icu_provider", + "icu_testdata", "indexmap", "jemallocator", "num-bigint", @@ -114,6 +119,7 @@ dependencies = [ "ryu-js", "serde", "serde_json", + "sys-locale", "tap", "unicode-normalization", ] @@ -197,7 +203,7 @@ checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" dependencies = [ "lazy_static", "memchr", - "regex-automata 0.1.10", + "regex-automata", "serde", ] @@ -404,6 +410,16 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "cstr_core" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "644828c273c063ab0d39486ba42a5d1f3a499d35529c759e763a9c6cb8a0fb08" +dependencies = [ + "cty", + "memchr", +] + [[package]] name = "csv" version = "1.1.6" @@ -426,6 +442,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "cty" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35" + [[package]] name = "dirs-next" version = "2.0.0" @@ -633,24 +655,6 @@ dependencies = [ "libc", ] -[[package]] -name = "icu" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15a4e90a2faa6719f4b3b1dac871d1f2794474d453b8e6ca1264062c4bf2c9da" -dependencies = [ - "fixed_decimal", - "icu_calendar", - "icu_datetime", - "icu_decimal", - "icu_list", - "icu_locale_canonicalizer", - "icu_locid", - "icu_plurals", - "icu_properties", - "writeable", -] - [[package]] name = "icu_calendar" version = "0.6.0" @@ -660,23 +664,11 @@ dependencies = [ "displaydoc", "icu_locid", "icu_provider", + "serde", "tinystr", "zerovec", ] -[[package]] -name = "icu_codepointtrie" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6cdb6b96093158ec0031f9831283085cf897cf4bffc9a1a35a8360a777141058" -dependencies = [ - "displaydoc", - "icu_uniset", - "yoke", - "zerofrom", - "zerovec", -] - [[package]] name = "icu_datetime" version = "0.6.0" @@ -690,39 +682,13 @@ dependencies = [ "icu_plurals", "icu_provider", "litemap", + "serde", "smallvec", "tinystr", "writeable", "zerovec", ] -[[package]] -name = "icu_decimal" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614ff51266354e8c8d75bfc65f806fbc0d0177da079c2482402cfc20b72de47d" -dependencies = [ - "displaydoc", - "fixed_decimal", - "icu_locid", - "icu_provider", - "writeable", -] - -[[package]] -name = "icu_list" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c144d074b8de0f6adcb6941ac4544abf83483fa5681154dcc361253c3a122c5c" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider", - "regex-automata 0.2.0", - "writeable", - "zerovec", -] - [[package]] name = "icu_locale_canonicalizer" version = "0.6.0" @@ -732,6 +698,7 @@ dependencies = [ "icu_locid", "icu_provider", "litemap", + "serde", "tinystr", "zerovec", ] @@ -760,35 +727,39 @@ dependencies = [ "fixed_decimal", "icu_locid", "icu_provider", + "serde", "zerovec", ] [[package]] -name = "icu_properties" +name = "icu_provider" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97ca8f26685a463ff47dc0d9f7b270e8d955d3c2dd9f748292af14940b659671" +checksum = "c7fbd7ffd479fdbbc366334a82821dc50d9f80b758389393374e9b36ff159f1a" dependencies = [ "displaydoc", - "icu_codepointtrie", - "icu_provider", - "icu_uniset", + "icu_locid", + "icu_provider_macros", + "litemap", + "postcard", + "serde", + "writeable", + "yoke", + "zerofrom", "zerovec", ] [[package]] -name = "icu_provider" +name = "icu_provider_blob" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fbd7ffd479fdbbc366334a82821dc50d9f80b758389393374e9b36ff159f1a" +checksum = "474b884a565f7ec52a26754a8b57646c128195e7af629caa52317ef6674e3e0d" dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider_macros", - "litemap", + "icu_provider", + "postcard", + "serde", "writeable", "yoke", - "zerofrom", "zerovec", ] @@ -804,16 +775,13 @@ dependencies = [ ] [[package]] -name = "icu_uniset" -version = "0.5.0" +name = "icu_testdata" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecdc2859b6efd75ae22e6350e62f21c87cfe3cfdc11bef6f9565995e88d14ba9" +checksum = "a5580eeaa6ea70b94f286120ffcfb70f75ac8d759d95ccf6223a3c479ff99285" dependencies = [ - "displaydoc", - "tinystr", - "yoke", - "zerofrom", - "zerovec", + "icu_provider", + "icu_provider_blob", ] [[package]] @@ -922,6 +890,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78d268a51abaaee3b8686e56396eb725b0da510bddd266a52e784aa1029dae73" dependencies = [ + "serde", "yoke", ] @@ -1185,6 +1154,22 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "postcard" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a25c0b0ae06fcffe600ad392aabfa535696c8973f2253d9ac83171924c58a858" +dependencies = [ + "postcard-cobs", + "serde", +] + +[[package]] +name = "postcard-cobs" +version = "0.1.5-pre" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c68cb38ed13fd7bc9dd5db8f165b7c8d9c1a315104083a2b10f11354c2af97f" + [[package]] name = "ppv-lite86" version = "0.2.16" @@ -1346,15 +1331,6 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -[[package]] -name = "regex-automata" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9368763f5a9b804326f3af749e16f9abf378d227bcdee7634b13d8f17793782" -dependencies = [ - "memchr", -] - [[package]] name = "regex-syntax" version = "0.6.26" @@ -1530,6 +1506,9 @@ name = "smallvec" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +dependencies = [ + "serde", +] [[package]] name = "stable_deref_trait" @@ -1619,6 +1598,19 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "sys-locale" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3913c5a3d30054d7f77cf07cdd800c8103ace15c6e44437c5db66a43dd3a92cf" +dependencies = [ + "cc", + "cstr_core", + "libc", + "web-sys", + "winapi", +] + [[package]] name = "tap" version = "1.0.1" @@ -1994,6 +1986,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c1b475ff48237bf7281cfa1721a52f0ad7f95ede1a46385e555870a354afc45" dependencies = [ + "serde", "yoke", "zerofrom", "zerovec-derive", diff --git a/boa_cli/Cargo.toml b/boa_cli/Cargo.toml index b36c57fe417..e3404680e42 100644 --- a/boa_cli/Cargo.toml +++ b/boa_cli/Cargo.toml @@ -2,7 +2,7 @@ name = "boa_cli" version = "0.14.0" edition = "2021" -rust-version = "1.58" +rust-version = "1.60" authors = ["boa-dev"] description = "Boa is a Javascript lexer, parser and Just-in-Time compiler written in Rust. Currently, it has support for some of the language." repository = "https://github.com/boa-dev/boa" diff --git a/boa_engine/Cargo.toml b/boa_engine/Cargo.toml index 87e866724ce..70306fc52e6 100644 --- a/boa_engine/Cargo.toml +++ b/boa_engine/Cargo.toml @@ -2,7 +2,7 @@ name = "boa_engine" version = "0.14.0" edition = "2021" -rust-version = "1.58" +rust-version = "1.60" authors = ["boa-dev"] description = "Boa is a Javascript lexer, parser and Just-in-Time compiler written in Rust. Currently, it has support for some of the language." repository = "https://github.com/boa-dev/boa" @@ -14,6 +14,15 @@ readme = "../README.md" [features] profiler = ["boa_profiler/profiler"] deser = ["boa_interner/serde"] +intl = [ + "dep:icu_locale_canonicalizer", + "dep:icu_locid", + "dep:icu_datetime", + "dep:icu_plurals", + "dep:icu_provider", + "dep:icu_testdata", + "dep:sys-locale" +] # Enable Boa's WHATWG console object implementation. console = [] @@ -42,7 +51,13 @@ dyn-clone = "1.0.5" once_cell = "1.12.0" queues = "1.0.2" tap = "1.0.1" -icu = "0.6.0" +icu_locale_canonicalizer = { version = "0.6.0", features = ["serde"], optional = true } +icu_locid = { version = "0.6.0", features = ["serde"], optional = true } +icu_datetime = { version = "0.6.0", features = ["serde"], optional = true } +icu_plurals = { version = "0.6.0", features = ["serde"], optional = true } +icu_provider = { version = "0.6.0", optional = true } +icu_testdata = {version = "0.6.0", optional = true} +sys-locale = { version = "0.2.0", optional = true } [dev-dependencies] criterion = "0.3.5" diff --git a/boa_engine/src/builtins/intl/mod.rs b/boa_engine/src/builtins/intl/mod.rs index 08c7264edcd..d4a23ee12c3 100644 --- a/boa_engine/src/builtins/intl/mod.rs +++ b/boa_engine/src/builtins/intl/mod.rs @@ -21,11 +21,11 @@ pub mod date_time_format; mod tests; use boa_profiler::Profiler; +use icu_locale_canonicalizer::LocaleCanonicalizer; +use icu_locid::{locale, Locale}; use indexmap::IndexSet; use rustc_hash::FxHashMap; -use tap::{Conv, Pipe}; - -use icu::locid::Locale; +use tap::{Conv, Pipe, TapOptional}; /// JavaScript `Intl` object. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -58,75 +58,8 @@ impl BuiltIn for Intl { } impl Intl { - fn canonicalize_locale(locale: &str) -> JsString { - JsString::new(locale) - } - - fn canonicalize_locale_list( - args: &[JsValue], - context: &mut Context, - ) -> JsResult> { - // https://tc39.es/ecma402/#sec-canonicalizelocalelist - // 1. If locales is undefined, then - let locales = args.get_or_undefined(0); - if locales.is_undefined() { - // a. Return a new empty List. - return Ok(Vec::new()); - } - - let locales = &args[0]; - - // 2. Let seen be a new empty List. - let mut seen = IndexSet::new(); - - // 3. If Type(locales) is String or Type(locales) is Object and locales has an [[InitializedLocale]] internal slot, then - // TODO: check if Type(locales) is object and handle the internal slots - let o = if locales.is_string() { - // a. Let O be CreateArrayFromList(« locales »). - Array::create_array_from_list([locales.clone()], context) - } else { - // 4. Else, - // a. Let O be ? ToObject(locales). - locales.to_object(context)? - }; - - // 5. Let len be ? ToLength(? Get(O, "length")). - let len = o.length_of_array_like(context)?; - - // 6 Let k be 0. - // 7. Repeat, while k < len, - for k in 0..len { - // a. Let Pk be ToString(k). - // b. Let kPresent be ? HasProperty(O, Pk). - let k_present = o.has_property(k, context)?; - // c. If kPresent is true, then - if k_present { - // i. Let kValue be ? Get(O, Pk). - let k_value = o.get(k, context)?; - // ii. If Type(kValue) is not String or Object, throw a TypeError exception. - if !(k_value.is_object() || k_value.is_string()) { - return context.throw_type_error("locale should be a String or Object"); - } - // iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]] internal slot, then - // TODO: handle checks for InitializedLocale internal slot (there should be an if statement here) - // 1. Let tag be kValue.[[Locale]]. - // iv. Else, - // 1. Let tag be ? ToString(kValue). - let tag = k_value.to_string(context)?; - // v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. - // TODO: implement `IsStructurallyValidLanguageTag` - - // vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). - seen.insert(Self::canonicalize_locale(&tag)); - // vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen. - } - // d. Increase k by 1. - } - - // 8. Return seen. - Ok(seen.into_iter().collect::>()) - } - + /// `Intl.getCanonicalLocales ( locales )` + /// /// Returns an array containing the canonical locale names. /// /// More information: @@ -141,10 +74,11 @@ impl Intl { context: &mut Context, ) -> JsResult { // 1. Let ll be ? CanonicalizeLocaleList(locales). - let ll = Self::canonicalize_locale_list(args, context)?; + let ll = canonicalize_locale_list(args, context)?; + // 2. Return CreateArrayFromList(ll). Ok(JsValue::Object(Array::create_array_from_list( - ll.into_iter().map(Into::into), + ll.into_iter().map(|loc| loc.to_string().into()), context, ))) } @@ -159,24 +93,29 @@ struct MatcherRecord { extension: JsString, } -/// The `DefaultLocale` abstract operation returns a String value representing the structurally -/// valid and canonicalized Unicode BCP 47 locale identifier for the host environment's current -/// locale. +/// Abstract operation `DefaultLocale ( )` +/// +/// Returns a String value representing the structurally valid and canonicalized +/// Unicode BCP 47 locale identifier for the host environment's current locale. /// /// More information: /// - [ECMAScript reference][spec] /// /// [spec]: https://tc39.es/ecma402/#sec-defaultlocale -fn default_locale() -> JsString { - // FIXME get locale from environment - JsString::new("en-US") +fn default_locale(canonicalizer: &LocaleCanonicalizer) -> Locale { + sys_locale::get_locale() + .and_then(|loc| loc.parse::().ok()) + .tap_some_mut(|loc| canonicalize_unicode_locale_id(loc, canonicalizer)) + .unwrap_or(locale!("en-US")) } -/// The `BestAvailableLocale` abstract operation compares the provided argument `locale`, -/// which must be a String value with a structurally valid and canonicalized Unicode BCP 47 -/// locale identifier, against the locales in `availableLocales` and returns either the longest -/// non-empty prefix of `locale` that is an element of `availableLocales`, or undefined if -/// there is no such element. +/// Abstract operation `BestAvailableLocale ( availableLocales, locale )` +/// +/// Compares the provided argument `locale`, which must be a String value with a +/// structurally valid and canonicalized Unicode BCP 47 locale identifier, against +/// the locales in `availableLocales` and returns either the longest non-empty prefix +/// of `locale` that is an element of `availableLocales`, or undefined if there is no +/// such element. /// /// More information: /// - [ECMAScript reference][spec] @@ -212,15 +151,21 @@ fn best_available_locale(available_locales: &[JsString], locale: &JsString) -> O } } -/// The `LookupMatcher` abstract operation compares `requestedLocales`, which must be a `List` -/// as returned by `CanonicalizeLocaleList`, against the locales in `availableLocales` and -/// determines the best available language to meet the request. +/// Abstract operation `LookupMatcher ( availableLocales, requestedLocales )` +/// +/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`, +/// against the locales in `availableLocales` and determines the best available language to +/// meet the request. /// /// More information: /// - [ECMAScript reference][spec] /// /// [spec]: https://tc39.es/ecma402/#sec-lookupmatcher -fn lookup_matcher(available_locales: &[JsString], requested_locales: &[JsString]) -> MatcherRecord { +fn lookup_matcher( + available_locales: &[JsString], + requested_locales: &[JsString], + canonicalizer: &LocaleCanonicalizer, +) -> MatcherRecord { // 1. Let result be a new Record. // 2. For each element locale of requestedLocales, do for locale_str in requested_locales { @@ -259,16 +204,18 @@ fn lookup_matcher(available_locales: &[JsString], requested_locales: &[JsString] // 4. Set result.[[locale]] to defLocale. // 5. Return result. MatcherRecord { - locale: default_locale(), + locale: default_locale(canonicalizer).to_string().into(), extension: JsString::empty(), } } -/// The `BestFitMatcher` abstract operation compares `requestedLocales`, which must be a `List` -/// as returned by `CanonicalizeLocaleList`, against the locales in `availableLocales` and -/// determines the best available language to meet the request. The algorithm is implementation -/// dependent, but should produce results that a typical user of the requested locales would -/// perceive as at least as good as those produced by the `LookupMatcher` abstract operation. +/// Abstract operation `BestFitMatcher ( availableLocales, requestedLocales )` +/// +/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`, +/// against the locales in `availableLocales` and determines the best available language to +/// meet the request. The algorithm is implementation dependent, but should produce results +/// that a typical user of the requested locales would perceive as at least as good as those +/// produced by the `LookupMatcher` abstract operation. /// /// More information: /// - [ECMAScript reference][spec] @@ -277,8 +224,9 @@ fn lookup_matcher(available_locales: &[JsString], requested_locales: &[JsString] fn best_fit_matcher( available_locales: &[JsString], requested_locales: &[JsString], + canonicalizer: &LocaleCanonicalizer, ) -> MatcherRecord { - lookup_matcher(available_locales, requested_locales) + lookup_matcher(available_locales, requested_locales, canonicalizer) } /// `Keyword` structure is a pair of keyword key and keyword value. @@ -302,9 +250,10 @@ struct UniExtRecord { keywords: Vec, } -/// The `UnicodeExtensionComponents` abstract operation returns the attributes and keywords from -/// `extension`, which must be a String value whose contents are a `Unicode locale extension` -/// sequence. +/// Abstract operation `UnicodeExtensionComponents ( extension )` +/// +/// Returns the attributes and keywords from `extension`, which must be a String +/// value whose contents are a `Unicode locale extension` sequence. /// /// More information: /// - [ECMAScript reference][spec] @@ -406,15 +355,21 @@ fn unicode_extension_components(extension: &JsString) -> UniExtRecord { } } -/// The `InsertUnicodeExtensionAndCanonicalize` abstract operation inserts `extension`, which must -/// be a Unicode locale extension sequence, into `locale`, which must be a String value with a -/// structurally valid and canonicalized Unicode BCP 47 locale identifier. +/// Abstract operation `InsertUnicodeExtensionAndCanonicalize ( locale, extension )` +/// +/// Inserts `extension`, which must be a Unicode locale extension sequence, into +/// `locale`, which must be a String value with a structurally valid and canonicalized +/// Unicode BCP 47 locale identifier. /// /// More information: /// - [ECMAScript reference][spec] /// /// [spec]: https://tc39.es/ecma402/#sec-insert-unicode-extension-and-canonicalize -fn insert_unicode_extension_and_canonicalize(locale: &str, extension: &str) -> JsString { +fn insert_unicode_extension_and_canonicalize( + locale: &str, + extension: &str, + canonicalizer: &LocaleCanonicalizer, +) -> JsString { // TODO 1. Assert: locale does not contain a substring that is a Unicode locale extension sequence. // TODO 2. Assert: extension is a Unicode locale extension sequence. // TODO 3. Assert: tag matches the unicode_locale_id production. @@ -442,9 +397,91 @@ fn insert_unicode_extension_and_canonicalize(locale: &str, extension: &str) -> J } }; - // TODO 7. Assert: ! IsStructurallyValidLanguageTag(locale) is true. + // 7. Assert: ! IsStructurallyValidLanguageTag(locale) is true. + let mut new_locale = new_locale + .parse() + .expect("Assert: ! IsStructurallyValidLanguageTag(locale) is true."); + // 8. Return ! CanonicalizeUnicodeLocaleId(locale). - Intl::canonicalize_locale(&new_locale) + canonicalize_unicode_locale_id(&mut new_locale, canonicalizer); + new_locale.to_string().into() +} + +/// Abstract operation `CanonicalizeLocaleList ( locales )` +/// +/// Converts an array of [`JsValue`]s containing structurally valid +/// [Unicode BCP 47 locale identifiers][bcp-47] into their [canonical form][canon]. +/// +/// For efficiency, this returns a [`Vec`] of [`Locale`]s instead of a [`Vec`] of +/// [`String`]s, since [`Locale`] allows us to modify individual parts of the locale +/// without scanning the whole string again. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-canonicalizelocalelist +/// [bcp-47]: https://unicode.org/reports/tr35/#Unicode_locale_identifier +/// [canon]: https://unicode.org/reports/tr35/#LocaleId_Canonicalization +fn canonicalize_locale_list(args: &[JsValue], context: &mut Context) -> JsResult> { + // 1. If locales is undefined, then + let locales = args.get_or_undefined(0); + if locales.is_undefined() { + // a. Return a new empty List. + return Ok(Vec::new()); + } + + // 2. Let seen be a new empty List. + let mut seen = IndexSet::new(); + + // 3. If Type(locales) is String or Type(locales) is Object and locales has an [[InitializedLocale]] internal slot, then + // TODO: check if Type(locales) is object and handle the internal slots + let o = if locales.is_string() { + // a. Let O be CreateArrayFromList(« locales »). + Array::create_array_from_list([locales.clone()], context) + } else { + // 4. Else, + // a. Let O be ? ToObject(locales). + locales.to_object(context)? + }; + + // 5. Let len be ? ToLength(? Get(O, "length")). + let len = o.length_of_array_like(context)?; + + // 6 Let k be 0. + // 7. Repeat, while k < len, + for k in 0..len { + // a. Let Pk be ToString(k). + // b. Let kPresent be ? HasProperty(O, Pk). + let k_present = o.has_property(k, context)?; + // c. If kPresent is true, then + if k_present { + // i. Let kValue be ? Get(O, Pk). + let k_value = o.get(k, context)?; + // ii. If Type(kValue) is not String or Object, throw a TypeError exception. + if !(k_value.is_object() || k_value.is_string()) { + return context.throw_type_error("locale should be a String or Object"); + } + // iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]] internal slot, then + // TODO: handle checks for InitializedLocale internal slot (there should be an if statement here) + // 1. Let tag be kValue.[[Locale]]. + // iv. Else, + // 1. Let tag be ? ToString(kValue). + let tag = k_value.to_string(context)?; + // v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + let mut tag = tag.parse().map_err(|_| { + context.construct_range_error("locale is not a structurally valid language tag") + })?; + + // vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + canonicalize_unicode_locale_id(&mut tag, &*context.icu().locale_canonicalizer()); + seen.insert(tag); + // vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen. + } + // d. Increase k by 1. + } + + // 8. Return seen. + Ok(seen.into_iter().collect()) } /// `LocaleDataRecord` is the type of `locale_data` argument in `resolve_locale` subroutine. @@ -473,10 +510,12 @@ struct ResolveLocaleRecord { pub(crate) data_locale: JsString, } -/// The `ResolveLocale` abstract operation compares a BCP 47 language priority list -/// `requestedLocales` against the locales in `availableLocales` and determines the best -/// available language to meet the request. `availableLocales`, `requestedLocales`, and -/// `relevantExtensionKeys` must be provided as `List` values, options and `localeData` as Records. +/// Abstract operation `ResolveLocale ( availableLocales, requestedLocales, options, relevantExtensionKeys, localeData )` +/// +/// Compares a BCP 47 language priority list `requestedLocales` against the locales +/// in `availableLocales` and determines the best available language to meet the request. +/// `availableLocales`, `requestedLocales`, and `relevantExtensionKeys` must be provided as +/// `List` values, options and `localeData` as Records. /// /// More information: /// - [ECMAScript reference][spec] @@ -498,9 +537,17 @@ fn resolve_locale( // 3. Else, // a. Let r be ! BestFitMatcher(availableLocales, requestedLocales). let r = if matcher.eq(&JsString::new("lookup")) { - lookup_matcher(available_locales, requested_locales) + lookup_matcher( + available_locales, + requested_locales, + context.icu().locale_canonicalizer(), + ) } else { - best_fit_matcher(available_locales, requested_locales) + best_fit_matcher( + available_locales, + requested_locales, + context.icu().locale_canonicalizer(), + ) }; // 4. Let foundLocale be r.[[locale]]. @@ -643,8 +690,11 @@ fn resolve_locale( // 10. If the number of elements in supportedExtension is greater than 2, then if supported_extension.len() > 2 { // a. Let foundLocale be InsertUnicodeExtensionAndCanonicalize(foundLocale, supportedExtension). - found_locale = - insert_unicode_extension_and_canonicalize(&found_locale, &supported_extension); + found_locale = insert_unicode_extension_and_canonicalize( + &found_locale, + &supported_extension, + context.icu().locale_canonicalizer(), + ); } // 11. Set result.[[locale]] to foundLocale. @@ -660,9 +710,11 @@ pub(crate) enum GetOptionType { Boolean, } -/// The abstract operation `GetOption` extracts the value of the property named `property` from the -/// provided `options` object, converts it to the required `type`, checks whether it is one of a -/// `List` of allowed `values`, and fills in a `fallback` value if necessary. If `values` is +/// Abstract operation `GetOption ( options, property, type, values, fallback )` +/// +/// Extracts the value of the property named `property` from the provided `options` object, +/// converts it to the required `type`, checks whether it is one of a `List` of allowed +/// `values`, and fills in a `fallback` value if necessary. If `values` is /// undefined, there is no fixed set of values and any is permitted. /// /// More information: @@ -709,9 +761,11 @@ pub(crate) fn get_option( Ok(value) } -/// The abstract operation `GetNumberOption` extracts the value of the property named `property` -/// from the provided `options` object, converts it to a `Number value`, checks whether it is in -/// the allowed range, and fills in a `fallback` value if necessary. +/// Abstract operation `GetNumberOption ( options, property, minimum, maximum, fallback )` +/// +/// Extracts the value of the property named `property` from the provided `options` +/// object, converts it to a `Number value`, checks whether it is in the allowed range, +/// and fills in a `fallback` value if necessary. /// /// More information: /// - [ECMAScript reference][spec] @@ -734,8 +788,10 @@ pub(crate) fn get_number_option( default_number_option(&value, minimum, maximum, fallback, context) } -/// The abstract operation `DefaultNumberOption` converts `value` to a `Number value`, checks -/// whether it is in the allowed range, and fills in a `fallback` value if necessary. +/// Abstract operation `DefaultNumberOption ( value, minimum, maximum, fallback )` +/// +/// Converts `value` to a `Number value`, checks whether it is in the allowed range, +/// and fills in a `fallback` value if necessary. /// /// More information: /// - [ECMAScript reference][spec] @@ -765,3 +821,16 @@ pub(crate) fn default_number_option( // 4. Return floor(value). Ok(Some(value.floor())) } + +/// Abstract operation `CanonicalizeUnicodeLocaleId ( locale )`. +/// +/// This function differs sligthly from the specification by modifying in-place +/// the provided [`Locale`] instead of creating a new canonicalized copy. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid +fn canonicalize_unicode_locale_id(locale: &mut Locale, canonicalizer: &LocaleCanonicalizer) { + canonicalizer.canonicalize(locale); +} diff --git a/boa_engine/src/builtins/intl/tests.rs b/boa_engine/src/builtins/intl/tests.rs index 8cf08b43d33..6abfb0c6002 100644 --- a/boa_engine/src/builtins/intl/tests.rs +++ b/boa_engine/src/builtins/intl/tests.rs @@ -9,6 +9,7 @@ use crate::{ Context, JsString, JsValue, }; +use icu_locale_canonicalizer::LocaleCanonicalizer; use rustc_hash::FxHashMap; #[test] @@ -47,27 +48,36 @@ fn best_avail_loc() { #[test] fn lookup_match() { + let provider = icu_testdata::get_provider(); + let canonicalizer = + LocaleCanonicalizer::new(&provider).expect("Could not create canonicalizer"); // available: [], requested: [] let available_locales = Vec::::new(); let requested_locales = Vec::::new(); - let matcher = lookup_matcher(&available_locales, &requested_locales); - assert_eq!(matcher.locale, default_locale()); + let matcher = lookup_matcher(&available_locales, &requested_locales, &canonicalizer); + assert_eq!( + matcher.locale, + default_locale(&canonicalizer).to_string().as_str() + ); assert_eq!(matcher.extension, ""); // available: [de-DE], requested: [] let available_locales = vec![JsString::new("de-DE")]; let requested_locales = Vec::::new(); - let matcher = lookup_matcher(&available_locales, &requested_locales); - assert_eq!(matcher.locale, default_locale()); + let matcher = lookup_matcher(&available_locales, &requested_locales, &canonicalizer); + assert_eq!( + matcher.locale, + default_locale(&canonicalizer).to_string().as_str() + ); assert_eq!(matcher.extension, ""); // available: [fr-FR], requested: [fr-FR-u-hc-h12] let available_locales = vec![JsString::new("fr-FR")]; let requested_locales = vec![JsString::new("fr-FR-u-hc-h12")]; - let matcher = lookup_matcher(&available_locales, &requested_locales); + let matcher = lookup_matcher(&available_locales, &requested_locales, &canonicalizer); assert_eq!(matcher.locale, "fr-FR"); assert_eq!(matcher.extension, "u-hc-h12"); @@ -75,32 +85,35 @@ fn lookup_match() { let available_locales = vec![JsString::new("es-ES")]; let requested_locales = vec![JsString::new("es-ES")]; - let matcher = best_fit_matcher(&available_locales, &requested_locales); + let matcher = best_fit_matcher(&available_locales, &requested_locales, &canonicalizer); assert_eq!(matcher.locale, "es-ES"); assert_eq!(matcher.extension, ""); } #[test] fn insert_unicode_ext() { + let provider = icu_testdata::get_provider(); + let canonicalizer = + LocaleCanonicalizer::new(&provider).expect("Could not create canonicalizer"); let locale = JsString::new("hu-HU"); let ext = JsString::empty(); assert_eq!( - insert_unicode_extension_and_canonicalize(&locale, &ext), + insert_unicode_extension_and_canonicalize(&locale, &ext, &canonicalizer), locale ); let locale = JsString::new("hu-HU"); let ext = JsString::new("-u-hc-h12"); assert_eq!( - insert_unicode_extension_and_canonicalize(&locale, &ext), + insert_unicode_extension_and_canonicalize(&locale, &ext, &canonicalizer), JsString::new("hu-HU-u-hc-h12") ); let locale = JsString::new("hu-HU-x-PRIVATE"); let ext = JsString::new("-u-hc-h12"); assert_eq!( - insert_unicode_extension_and_canonicalize(&locale, &ext), - JsString::new("hu-HU-u-hc-h12-x-PRIVATE") + insert_unicode_extension_and_canonicalize(&locale, &ext, &canonicalizer), + JsString::new("hu-HU-u-hc-h12-x-private") ); } @@ -165,8 +178,18 @@ fn locale_resolution() { &locale_data, &mut context, ); - assert_eq!(locale_record.locale, default_locale()); - assert_eq!(locale_record.data_locale, default_locale()); + assert_eq!( + locale_record.locale, + default_locale(context.icu().locale_canonicalizer()) + .to_string() + .as_str() + ); + assert_eq!( + locale_record.data_locale, + default_locale(context.icu().locale_canonicalizer()) + .to_string() + .as_str() + ); assert!(locale_record.properties.is_empty()); // test best fit @@ -187,8 +210,18 @@ fn locale_resolution() { &locale_data, &mut context, ); - assert_eq!(locale_record.locale, default_locale()); - assert_eq!(locale_record.data_locale, default_locale()); + assert_eq!( + locale_record.locale, + default_locale(context.icu().locale_canonicalizer()) + .to_string() + .as_str() + ); + assert_eq!( + locale_record.data_locale, + default_locale(context.icu().locale_canonicalizer()) + .to_string() + .as_str() + ); assert!(locale_record.properties.is_empty()); // available: [es-ES], requested: [es-ES] @@ -231,8 +264,18 @@ fn locale_resolution() { &locale_data, &mut context, ); - assert_eq!(locale_record.locale, default_locale()); - assert_eq!(locale_record.data_locale, default_locale()); + assert_eq!( + locale_record.locale, + default_locale(context.icu().locale_canonicalizer()) + .to_string() + .as_str() + ); + assert_eq!( + locale_record.data_locale, + default_locale(context.icu().locale_canonicalizer()) + .to_string() + .as_str() + ); assert!(locale_record.properties.is_empty()); } diff --git a/boa_engine/src/builtins/map/map_iterator.rs b/boa_engine/src/builtins/map/map_iterator.rs index 7ed7c236c34..d847beb8c81 100644 --- a/boa_engine/src/builtins/map/map_iterator.rs +++ b/boa_engine/src/builtins/map/map_iterator.rs @@ -33,7 +33,7 @@ impl MapIterator { /// More information: /// - [ECMA reference][spec] /// - /// [spec]: https://www.ecma-international.org/ecma-262/11.0/index.html#sec-createmapiterator + /// [spec]: https://tc39.es/ecma262/#sec-createmapiterator pub(crate) fn create_map_iterator( map: &JsValue, kind: PropertyNameKind, diff --git a/boa_engine/src/builtins/map/mod.rs b/boa_engine/src/builtins/map/mod.rs index 673d7f9b386..26bf2075aff 100644 --- a/boa_engine/src/builtins/map/mod.rs +++ b/boa_engine/src/builtins/map/mod.rs @@ -170,7 +170,7 @@ impl Map { /// - [ECMAScript reference][spec] /// - [MDN documentation][mdn] /// - /// [spec]: https://www.ecma-international.org/ecma-262/11.0/index.html#sec-map.prototype.entries + /// [spec]: https://tc39.es/ecma262/#sec-map.prototype.entries /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/entries pub(crate) fn entries( this: &JsValue, diff --git a/boa_engine/src/builtins/mod.rs b/boa_engine/src/builtins/mod.rs index 11ae1e56804..2d4bcd5596c 100644 --- a/boa_engine/src/builtins/mod.rs +++ b/boa_engine/src/builtins/mod.rs @@ -4,8 +4,6 @@ pub mod array; pub mod array_buffer; pub mod bigint; pub mod boolean; -#[cfg(feature = "console")] -pub mod console; pub mod dataview; pub mod date; pub mod error; @@ -15,7 +13,6 @@ pub mod generator; pub mod generator_function; pub mod global_this; pub mod infinity; -pub mod intl; pub mod iterable; pub mod json; pub mod map; @@ -33,6 +30,12 @@ pub mod symbol; pub mod typed_array; pub mod undefined; +#[cfg(feature = "console")] +pub mod console; + +#[cfg(feature = "intl")] +pub mod intl; + pub(crate) use self::{ array::{array_iterator::ArrayIterator, Array}, bigint::BigInt, @@ -47,7 +50,6 @@ pub(crate) use self::{ function::BuiltInFunctionObject, global_this::GlobalThis, infinity::Infinity, - intl::Intl, json::Json, map::map_iterator::MapIterator, map::Map, @@ -145,7 +147,6 @@ pub fn init(context: &mut Context) { BuiltInFunctionObject, BuiltInObjectObject, Math, - Intl, Json, Array, Proxy, @@ -187,6 +188,9 @@ pub fn init(context: &mut Context) { Promise }; + #[cfg(feature = "intl")] + init_builtin::(context); + #[cfg(feature = "console")] init_builtin::(context); } diff --git a/boa_engine/src/builtins/set/set_iterator.rs b/boa_engine/src/builtins/set/set_iterator.rs index 07b2f5e2c72..af27d13e306 100644 --- a/boa_engine/src/builtins/set/set_iterator.rs +++ b/boa_engine/src/builtins/set/set_iterator.rs @@ -40,7 +40,7 @@ impl SetIterator { /// More information: /// - [ECMA reference][spec] /// - /// [spec]: https://www.ecma-international.org/ecma-262/11.0/index.html#sec-createsetiterator + /// [spec]: https://tc39.es/ecma262/#sec-createsetiterator pub(crate) fn create_set_iterator( set: JsValue, kind: PropertyNameKind, diff --git a/boa_engine/src/context/icu.rs b/boa_engine/src/context/icu.rs new file mode 100644 index 00000000000..880d9d01916 --- /dev/null +++ b/boa_engine/src/context/icu.rs @@ -0,0 +1,79 @@ +use icu_datetime::provider::{ + calendar::{DatePatternsV1Marker, DateSkeletonPatternsV1Marker, DateSymbolsV1Marker}, + week_data::WeekDataV1Marker, +}; +use icu_locale_canonicalizer::{ + provider::{AliasesV1Marker, LikelySubtagsV1Marker}, + LocaleCanonicalizer, +}; +use icu_plurals::provider::OrdinalV1Marker; +use icu_provider::prelude::*; + +/// Trait encompassing all the required implementations that define +/// a valid icu data provider. +pub trait BoaProvider: + ResourceProvider + + ResourceProvider + + ResourceProvider + + ResourceProvider + + ResourceProvider + + ResourceProvider + + ResourceProvider +{ +} + +impl BoaProvider for T where + T: ResourceProvider + + ResourceProvider + + ResourceProvider + + ResourceProvider + + ResourceProvider + + ResourceProvider + + ResourceProvider + + ?Sized +{ +} + +/// Collection of tools initialized from a [`BoaProvider`] that are used +/// for the functionality of `Intl`. +#[allow(unused)] +pub(crate) struct Icu { + provider: Box, + locale_canonicalizer: LocaleCanonicalizer, +} + +impl std::fmt::Debug for Icu { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + #[derive(Debug)] + struct Canonicalizer; + f.debug_struct("Icu") + .field("locale_canonicalizer", &Canonicalizer) + .finish() + } +} + +impl Icu { + /// Create a new [`Icu`] from a valid [`BoaProvider`] + /// + /// # Errors + /// + /// This method will return an error if any of the tools + /// required cannot be constructed. + pub(crate) fn new(provider: Box) -> Result { + Ok(Self { + locale_canonicalizer: LocaleCanonicalizer::new(&*provider)?, + provider, + }) + } + + /// Get the [`LocaleCanonicalizer`] tool. + pub(crate) fn locale_canonicalizer(&self) -> &LocaleCanonicalizer { + &self.locale_canonicalizer + } + + /// Get the inner icu data provider + #[allow(unused)] + pub(crate) fn provider(&self) -> &dyn BoaProvider { + self.provider.as_ref() + } +} diff --git a/boa_engine/src/context/mod.rs b/boa_engine/src/context/mod.rs index 08058aa9824..f3f75fd0972 100644 --- a/boa_engine/src/context/mod.rs +++ b/boa_engine/src/context/mod.rs @@ -2,8 +2,13 @@ pub mod intrinsics; +#[cfg(feature = "intl")] +mod icu; + use intrinsics::{IntrinsicObjects, Intrinsics}; +#[cfg(feature = "console")] +use crate::builtins::console::Console; use crate::{ builtins::{self, function::NativeFunctionSignature}, bytecompiler::ByteCompiler, @@ -16,13 +21,18 @@ use crate::{ vm::{CallFrame, CodeBlock, FinallyReturn, GeneratorResumeKind, Vm}, JsResult, JsValue, }; + use boa_gc::Gc; use boa_interner::{Interner, Sym}; use boa_profiler::Profiler; use queues::{queue, IsQueue, Queue}; -#[cfg(feature = "console")] -use crate::builtins::console::Console; +#[cfg(feature = "intl")] +use icu_provider::DataError; + +#[doc(inline)] +#[cfg(all(feature = "intl", doc))] +pub use icu::BoaProvider; /// Javascript context. It is the primary way to interact with the runtime. /// @@ -84,6 +94,10 @@ pub struct Context { /// Intrinsic objects intrinsics: Intrinsics, + /// ICU related utilities + #[cfg(feature = "intl")] + icu: icu::Icu, + pub(crate) vm: Vm, pub(crate) promise_job_queue: Queue>, @@ -91,40 +105,16 @@ pub struct Context { impl Default for Context { fn default() -> Self { - let mut context = Self { - realm: Realm::create(), - interner: Interner::default(), - #[cfg(feature = "console")] - console: Console::default(), - intrinsics: Intrinsics::default(), - vm: Vm { - frame: None, - stack: Vec::with_capacity(1024), - trace: false, - stack_size_limit: 1024, - }, - promise_job_queue: queue![], - }; - - // Add new builtIns to Context Realm - // At a later date this can be removed from here and called explicitly, - // but for now we almost always want these default builtins - context.intrinsics.objects = IntrinsicObjects::init(&mut context); - context.create_intrinsics(); - context + ContextBuilder::default().build() } } impl Context { - /// Create a new `Context`. - #[inline] - pub fn new(interner: Interner) -> Self { - Self { - interner, - ..Self::default() - } + /// Create a new [`ContextBuilder`] to specify the [`Interner`] and/or + /// the icu data provider. + pub fn builder() -> ContextBuilder { + ContextBuilder::default() } - /// Gets the string interner. #[inline] pub fn interner(&self) -> &Interner { @@ -761,4 +751,89 @@ impl Context { _ => (), } } + + #[cfg(feature = "intl")] + #[inline] + /// Get the ICU related utilities + pub(crate) fn icu(&self) -> &icu::Icu { + &self.icu + } +} + +/// Builder for the [`Context`] type. +/// +/// This builder allows custom initialization of the [`Interner`] within +/// the context. +/// Additionally, if the `intl` feature is enabled, [`ContextBuilder`] becomes +/// the only way to create a new [`Context`], since now it requires a +/// valid data provider for the `Intl` functionality. +/// +#[cfg_attr( + feature = "intl", + doc = "The required data in a valid provider is specified in [`BoaProvider`]" +)] +#[derive(Debug, Default)] +pub struct ContextBuilder { + interner: Option, + #[cfg(feature = "intl")] + icu: Option, +} + +impl ContextBuilder { + /// Initializes the context [`Interner`] to the provided interner. + /// + /// This is useful when you want to initialize an [`Interner`] with + /// a collection of words before parsing. + #[must_use] + pub fn interner(mut self, interner: Interner) -> Self { + self.interner = Some(interner); + self + } + + /// Provides an icu data provider to the [`Context`]. + /// + /// This function is only available if the `intl` feature is enabled. + #[cfg(any(feature = "intl", docs))] + pub fn icu_provider(mut self, provider: Box) -> Result { + self.icu = Some(icu::Icu::new(provider)?); + Ok(self) + } + + /// Creates a new [`ContextBuilder`] with a default empty [`Interner`] + /// and a default [`BoaProvider`] if the `intl` feature is enabled. + pub fn new() -> Self { + Self::default() + } + + /// Builds a new [`Context`] with the provided parameters, and defaults + /// all missing parameters to their default values. + pub fn build(self) -> Context { + let mut context = Context { + realm: Realm::create(), + interner: self.interner.unwrap_or_default(), + #[cfg(feature = "console")] + console: Console::default(), + intrinsics: Intrinsics::default(), + vm: Vm { + frame: None, + stack: Vec::with_capacity(1024), + trace: false, + stack_size_limit: 1024, + }, + promise_job_queue: queue![], + #[cfg(feature = "intl")] + icu: self.icu.unwrap_or_else(|| { + // TODO: Replace with a more fitting default + icu::Icu::new(Box::new(icu_testdata::get_provider())) + .expect("Failed to initialize default icu data.") + }), + }; + + // Add new builtIns to Context Realm + // At a later date this can be removed from here and called explicitly, + // but for now we almost always want these default builtins + context.intrinsics.objects = IntrinsicObjects::init(&mut context); + context.create_intrinsics(); + context + } } diff --git a/boa_engine/src/lib.rs b/boa_engine/src/lib.rs index ddd4150dc1a..626db695619 100644 --- a/boa_engine/src/lib.rs +++ b/boa_engine/src/lib.rs @@ -3,8 +3,12 @@ //! //! # Crate Features //! - **serde** - Enables serialization and deserialization of the AST (Abstract Syntax Tree). -//! - **console** - Enables `boa`s WHATWG `console` object implementation. +//! - **console** - Enables `boa`'s [WHATWG `console`][whatwg] object implementation. //! - **profiler** - Enables profiling with measureme (this is mostly internal). +//! - **intl** - Enables `boa`'s [ECMA-402 Internationalization API][ecma-402] (`Intl` object) +//! +//! [whatwg]: https://console.spec.whatwg.org +//! [ecma-402]: https://tc39.es/ecma402 #![doc( html_logo_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo.svg", diff --git a/boa_engine/src/object/jsproxy.rs b/boa_engine/src/object/jsproxy.rs index 5987da80ebc..ad9a65867d6 100644 --- a/boa_engine/src/object/jsproxy.rs +++ b/boa_engine/src/object/jsproxy.rs @@ -65,6 +65,7 @@ impl JsObjectType for JsProxy {} /// accessible from [`JsProxy::builder`]; with the [`JsProxyBuilder::build_revocable`] /// method. /// +/// [proxy]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Proxy /// [revocable]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Proxy/revocable #[derive(Debug, Trace, Finalize)] pub struct JsRevocableProxy { diff --git a/boa_engine/src/object/mod.rs b/boa_engine/src/object/mod.rs index 30b13938b28..de4cf8241d9 100644 --- a/boa_engine/src/object/mod.rs +++ b/boa_engine/src/object/mod.rs @@ -20,6 +20,8 @@ use self::internal_methods::{ string::STRING_EXOTIC_INTERNAL_METHODS, InternalObjectMethods, ORDINARY_INTERNAL_METHODS, }; +#[cfg(feature = "intl")] +use crate::builtins::intl::date_time_format::DateTimeFormat; use crate::{ builtins::{ array::array_iterator::ArrayIterator, @@ -29,7 +31,6 @@ use crate::{ arguments::ParameterMap, BoundFunction, Captures, Function, NativeFunctionSignature, }, generator::Generator, - intl::date_time_format::DateTimeFormat, map::map_iterator::MapIterator, map::ordered_map::OrderedMap, object::for_in_iterator::ForInIterator, @@ -45,6 +46,7 @@ use crate::{ property::{Attribute, PropertyDescriptor, PropertyKey}, Context, JsBigInt, JsResult, JsString, JsSymbol, JsValue, }; + use boa_gc::{Finalize, Trace}; use boa_interner::Sym; use rustc_hash::FxHashMap; @@ -169,6 +171,7 @@ pub enum ObjectKind { NativeObject(Box), IntegerIndexed(IntegerIndexed), Promise(Promise), + #[cfg(feature = "intl")] DateTimeFormat(Box), } @@ -436,6 +439,7 @@ impl ObjectData { } /// Create the `DateTimeFormat` object data + #[cfg(feature = "intl")] pub fn date_time_format(date_time_fmt: Box) -> Self { Self { kind: ObjectKind::DateTimeFormat(date_time_fmt), @@ -477,6 +481,7 @@ impl Display for ObjectKind { Self::IntegerIndexed(_) => "TypedArray", Self::DataView(_) => "DataView", Self::Promise(_) => "Promise", + #[cfg(feature = "intl")] Self::DateTimeFormat(_) => "DateTimeFormat", }) } diff --git a/boa_engine/src/syntax/ast/keyword.rs b/boa_engine/src/syntax/ast/keyword.rs index 16389db0fbd..723d0d6172d 100644 --- a/boa_engine/src/syntax/ast/keyword.rs +++ b/boa_engine/src/syntax/ast/keyword.rs @@ -4,7 +4,7 @@ //! - [ECMAScript reference][spec] //! - [MDN documentation][mdn] //! -//! [spec]: https://www.ecma-international.org/ecma-262/#sec-keywords +//! [spec]: https://tc39.es/ecma262/#sec-keywords-and-reserved-words //! [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#Keywords use crate::syntax::ast::op::{BinOp, CompOp}; @@ -22,7 +22,7 @@ use serde::{Deserialize, Serialize}; /// - [ECMAScript reference][spec] /// - [MDN documentation][mdn] /// -/// [spec]: https://www.ecma-international.org/ecma-262/#sec-keywords +/// [spec]: https://tc39.es/ecma262/#sec-keywords-and-reserved-words /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#Keywords #[cfg_attr(feature = "deser", derive(Serialize, Deserialize))] #[derive(Clone, Copy, PartialEq, Debug)] diff --git a/boa_engine/src/syntax/lexer/regex.rs b/boa_engine/src/syntax/lexer/regex.rs index de73c415a94..d18f3922143 100644 --- a/boa_engine/src/syntax/lexer/regex.rs +++ b/boa_engine/src/syntax/lexer/regex.rs @@ -23,7 +23,7 @@ use std::{ /// - [ECMAScript reference][spec] /// - [MDN documentation][mdn] /// -/// [spec]: https://www.ecma-international.org/ecma-262/#sec-literals-regular-expression-literals +/// [spec]: https://tc39.es/ecma262/#sec-literals-regular-expression-literals /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions #[derive(Debug, Clone, Copy)] pub(super) struct RegexLiteral; diff --git a/boa_engine/src/syntax/parser/statement/declaration/hoistable/async_function_decl/mod.rs b/boa_engine/src/syntax/parser/statement/declaration/hoistable/async_function_decl/mod.rs index f01cd523736..951b53b653c 100644 --- a/boa_engine/src/syntax/parser/statement/declaration/hoistable/async_function_decl/mod.rs +++ b/boa_engine/src/syntax/parser/statement/declaration/hoistable/async_function_decl/mod.rs @@ -18,7 +18,7 @@ use std::io::Read; /// - [ECMAScript specification][spec] /// /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/async_function -/// [spec]: https://www.ecma-international.org/ecma-262/11.0/index.html#prod-AsyncFunctionDeclaration +/// [spec]: https://tc39.es/ecma262/#prod-AsyncFunctionDeclaration #[derive(Debug, Clone, Copy)] pub(super) struct AsyncFunctionDeclaration { allow_yield: AllowYield, diff --git a/boa_engine/src/syntax/parser/tests.rs b/boa_engine/src/syntax/parser/tests.rs index 76bcdec1cfe..5ccc45a0cf5 100644 --- a/boa_engine/src/syntax/parser/tests.rs +++ b/boa_engine/src/syntax/parser/tests.rs @@ -2,6 +2,7 @@ use super::Parser; use crate::{ + context::ContextBuilder, syntax::ast::{ node::{ field::GetConstField, object::PropertyDefinition, ArrowFunctionDecl, Assign, BinOp, @@ -23,7 +24,7 @@ pub(super) fn check_parser(js: &str, expr: L, interner: Interner) where L: Into>, { - let mut context = Context::new(interner); + let mut context = ContextBuilder::default().interner(interner).build(); assert_eq!( Parser::new(js.as_bytes()) .parse_all(&mut context) @@ -469,7 +470,7 @@ fn hashbang_use_strict_no_with() { fn hashbang_use_strict_with_with_statement() { check_parser( r#"#!\"use strict" - + with({}) {} "#, vec![], diff --git a/boa_gc/Cargo.toml b/boa_gc/Cargo.toml index c055eda66d6..519349893e7 100644 --- a/boa_gc/Cargo.toml +++ b/boa_gc/Cargo.toml @@ -2,7 +2,7 @@ name = "boa_gc" version = "0.14.0" edition = "2021" -rust-version = "1.58" +rust-version = "1.60" authors = ["boa-dev"] description = "Garbage collector used in Boa." repository = "https://github.com/boa-dev/boa" diff --git a/boa_interner/Cargo.toml b/boa_interner/Cargo.toml index 53633c1f1b4..35aa1b4507f 100644 --- a/boa_interner/Cargo.toml +++ b/boa_interner/Cargo.toml @@ -2,7 +2,7 @@ name = "boa_interner" version = "0.14.0" edition = "2021" -rust-version = "1.58" +rust-version = "1.60" authors = ["boa-dev"] description = "String interner used in Boa." repository = "https://github.com/boa-dev/boa" diff --git a/boa_profiler/Cargo.toml b/boa_profiler/Cargo.toml index 9c0829f9295..fb31e3cb213 100644 --- a/boa_profiler/Cargo.toml +++ b/boa_profiler/Cargo.toml @@ -2,7 +2,7 @@ name = "boa_profiler" version = "0.14.0" edition = "2021" -rust-version = "1.58" +rust-version = "1.60" authors = ["boa-dev"] description = "Profiler used in Boa." repository = "https://github.com/boa-dev/boa" diff --git a/boa_tester/Cargo.toml b/boa_tester/Cargo.toml index bd5d2489323..ce971546e1e 100644 --- a/boa_tester/Cargo.toml +++ b/boa_tester/Cargo.toml @@ -2,7 +2,7 @@ name = "boa_tester" version = "0.14.0" edition = "2021" -rust-version = "1.58" +rust-version = "1.60" authors = ["boa-dev"] description = "Test runner for the Boa JavaScript engine." repository = "https://github.com/boa-dev/boa" @@ -12,7 +12,7 @@ license = "Unlicense/MIT" publish = false [dependencies] -boa_engine = { path = "../boa_engine", version = "0.14.0" } +boa_engine = { path = "../boa_engine", features = ["intl"], version = "0.14.0" } boa_interner = { path = "../boa_interner", version = "0.14.0" } structopt = "0.3.26" serde = { version = "1.0.137", features = ["derive"] } diff --git a/boa_unicode/Cargo.toml b/boa_unicode/Cargo.toml index 9127836216d..2e5517a7f18 100644 --- a/boa_unicode/Cargo.toml +++ b/boa_unicode/Cargo.toml @@ -2,7 +2,7 @@ name = "boa_unicode" version = "0.14.0" edition = "2021" -rust-version = "1.58" +rust-version = "1.60" authors = ["boa-dev"] description = "Unicode support for the Boa JavaScript engine." repository = "https://github.com/boa-dev/boa" diff --git a/boa_wasm/Cargo.toml b/boa_wasm/Cargo.toml index e769ecb1ce9..0f55fa3a26a 100644 --- a/boa_wasm/Cargo.toml +++ b/boa_wasm/Cargo.toml @@ -2,7 +2,7 @@ name = "boa_wasm" version = "0.14.0" edition = "2021" -rust-version = "1.58" +rust-version = "1.60" authors = ["boa-dev"] description = "WASM package for the Boa JavaScript engine." repository = "https://github.com/boa-dev/boa"