From 77458bbafa52bf3c9e9c5d3add6339d4281f1121 Mon Sep 17 00:00:00 2001 From: Elango Cheran Date: Thu, 21 Oct 2021 16:17:00 -0700 Subject: [PATCH] Initial code for connecting properties provider to the icu4x_datagen exporter tool --- Cargo.lock | 2 + components/properties/src/provider.rs | 10 ++-- provider/testdata/src/paths.rs | 5 ++ provider/uprops/src/enum_codepointtrie.rs | 19 +++++++ tools/datagen/Cargo.toml | 2 + tools/datagen/src/bin/datagen.rs | 65 +++++++++++++++++++++++ 6 files changed, 98 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a6e2e5ca52c..28425d8f458 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1089,10 +1089,12 @@ dependencies = [ "clap", "futures", "icu_locid", + "icu_properties", "icu_provider", "icu_provider_blob", "icu_provider_cldr", "icu_provider_fs", + "icu_provider_uprops", "icu_testdata", "log", "reqwest", diff --git a/components/properties/src/provider.rs b/components/properties/src/provider.rs index d4e2d5abaa8..4c5f8896bad 100644 --- a/components/properties/src/provider.rs +++ b/components/properties/src/provider.rs @@ -22,19 +22,18 @@ pub mod key { /// Macro to help define resource keys and store them in a list. macro_rules! define_resource_keys { - ($count:expr; $(($k:ident, $s:literal)),+,) => { + ($allkeys:ident; $count:expr; $(($k:ident, $s:literal)),+,) => { $( #[allow(missing_docs)] // These constants don't need individual documentation. pub const $k: ResourceKey = resource_key!(UnicodeSet, $s, 1); )+ /// The set of all resource keys supported by [`icu_uniset`](crate). - pub const ALL_KEYS: [ResourceKey; $count] = [$($k,)+]; + pub const $allkeys: [ResourceKey; $count] = [$($k,)+]; }; } - define_resource_keys!(267; - + define_resource_keys!(ALL_SET_KEYS; 265; // // Binary property UnicodeSets // @@ -313,7 +312,9 @@ pub mod key { (SCRIPT_YEZIDI_V1, "sc=Yezi"), (SCRIPT_YI_V1, "sc=Yiii"), (SCRIPT_ZANABAZAR_SQUARE_V1, "sc=Zanb"), + ); + define_resource_keys!(ALL_MAP_KEYS; 2; // // Enumerated property CodePointMaps // @@ -322,7 +323,6 @@ pub mod key { (GENERAL_CATEGORY_V1, "gc"), (SCRIPT_V1, "sc"), - ); } diff --git a/provider/testdata/src/paths.rs b/provider/testdata/src/paths.rs index ed3cc0ac372..fa9a60cd485 100644 --- a/provider/testdata/src/paths.rs +++ b/provider/testdata/src/paths.rs @@ -14,6 +14,11 @@ pub fn cldr_json_root() -> PathBuf { data_root().join("cldr") } +/// Returns the absolute path to the UProps TOML root directory. +pub fn uprops_toml_root() -> PathBuf { + data_root().join("uprops") +} + /// Returns the absolute path to the ICU4X JSON root directory. pub fn icu4x_json_root() -> PathBuf { data_root().join("json") diff --git a/provider/uprops/src/enum_codepointtrie.rs b/provider/uprops/src/enum_codepointtrie.rs index a8cf5f2a6ad..8b5a0e39c37 100644 --- a/provider/uprops/src/enum_codepointtrie.rs +++ b/provider/uprops/src/enum_codepointtrie.rs @@ -8,8 +8,11 @@ use crate::uprops_serde::enumerated::EnumeratedPropertyCodePointTrie; use icu_codepointtrie::codepointtrie::{CodePointTrie, CodePointTrieHeader, TrieType, TrieValue}; use icu_properties::provider::{UnicodePropertyMapV1, UnicodePropertyMapV1Marker}; +use icu_provider::iter::IterableDataProviderCore; use icu_provider::prelude::*; use zerovec::ZeroVec; +use icu_properties::provider::*; +use icu_properties::{Script, GeneralSubcategory}; use core::convert::TryFrom; @@ -110,6 +113,22 @@ impl<'data, T: TrieValue> DataProvider<'data, UnicodePropertyMapV1Marker> } } +icu_provider::impl_dyn_provider!(EnumeratedPropertyCodePointTrieProvider, { + key::GENERAL_CATEGORY_V1 => UnicodePropertyMapV1Marker, + key::SCRIPT_V1 => UnicodePropertyMapV1Marker