Skip to content

Commit

Permalink
Initial code for connecting properties provider to the icu4x_datagen …
Browse files Browse the repository at this point in the history
…exporter tool
  • Loading branch information
echeran committed Oct 21, 2021
1 parent 7256f16 commit 77458bb
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 5 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions components/properties/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,18 @@ pub mod key {

/// Macro to help define resource keys and store them in a list.
macro_rules! define_resource_keys {
($count:expr; $(($k:ident, $s:literal)),+,) => {
($allkeys:ident; $count:expr; $(($k:ident, $s:literal)),+,) => {
$(
#[allow(missing_docs)] // These constants don't need individual documentation.
pub const $k: ResourceKey = resource_key!(UnicodeSet, $s, 1);
)+

/// The set of all resource keys supported by [`icu_uniset`](crate).
pub const ALL_KEYS: [ResourceKey; $count] = [$($k,)+];
pub const $allkeys: [ResourceKey; $count] = [$($k,)+];
};
}

define_resource_keys!(267;

define_resource_keys!(ALL_SET_KEYS; 265;
//
// Binary property UnicodeSets
//
Expand Down Expand Up @@ -313,7 +312,9 @@ pub mod key {
(SCRIPT_YEZIDI_V1, "sc=Yezi"),
(SCRIPT_YI_V1, "sc=Yiii"),
(SCRIPT_ZANABAZAR_SQUARE_V1, "sc=Zanb"),
);

define_resource_keys!(ALL_MAP_KEYS; 2;
//
// Enumerated property CodePointMaps
//
Expand All @@ -322,7 +323,6 @@ pub mod key {

(GENERAL_CATEGORY_V1, "gc"),
(SCRIPT_V1, "sc"),

);
}

Expand Down
5 changes: 5 additions & 0 deletions provider/testdata/src/paths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ pub fn cldr_json_root() -> PathBuf {
data_root().join("cldr")
}

/// Returns the absolute path to the UProps TOML root directory.
pub fn uprops_toml_root() -> PathBuf {
data_root().join("uprops")
}

/// Returns the absolute path to the ICU4X JSON root directory.
pub fn icu4x_json_root() -> PathBuf {
data_root().join("json")
Expand Down
19 changes: 19 additions & 0 deletions provider/uprops/src/enum_codepointtrie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ use crate::uprops_serde::enumerated::EnumeratedPropertyCodePointTrie;

use icu_codepointtrie::codepointtrie::{CodePointTrie, CodePointTrieHeader, TrieType, TrieValue};
use icu_properties::provider::{UnicodePropertyMapV1, UnicodePropertyMapV1Marker};
use icu_provider::iter::IterableDataProviderCore;
use icu_provider::prelude::*;
use zerovec::ZeroVec;
use icu_properties::provider::*;
use icu_properties::{Script, GeneralSubcategory};

use core::convert::TryFrom;

Expand Down Expand Up @@ -110,6 +113,22 @@ impl<'data, T: TrieValue> DataProvider<'data, UnicodePropertyMapV1Marker<T>>
}
}

icu_provider::impl_dyn_provider!(EnumeratedPropertyCodePointTrieProvider, {
key::GENERAL_CATEGORY_V1 => UnicodePropertyMapV1Marker<GeneralSubcategory>,
key::SCRIPT_V1 => UnicodePropertyMapV1Marker<Script>,
}, SERDE_SE, 'data);

impl IterableDataProviderCore for EnumeratedPropertyCodePointTrieProvider {
fn supported_options_for_key(
&self,
_resc_key: &ResourceKey,
) -> Result<Box<dyn Iterator<Item = ResourceOptions>>, DataError> {
let list: Vec<ResourceOptions> = vec![ResourceOptions::default()];
Ok(Box::new(list.into_iter()))
}
}


#[cfg(test)]
mod tests {
use super::*;
Expand Down
2 changes: 2 additions & 0 deletions tools/datagen/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@ anyhow = "1.0"
clap = "2.33"
futures = "0.3"
icu_locid = { version = "0.3", path = "../../components/locid", features = ["std"]}
icu_properties = { version = "0.3", path = "../../components/properties", features = ["std"]}
icu_provider = { version = "0.3", path = "../../provider/core", features = ["std"]}
icu_provider_blob = { version = "0.3", path = "../../provider/blob", features = ["export"] }
icu_provider_cldr = { version = "0.3", path = "../../provider/cldr", features = ["download"] }
icu_provider_fs = { version = "0.3", path = "../../provider/fs", features = ["export", "provider_json", "provider_bincode"] }
icu_provider_uprops = { version = "0.3", path = "../../provider/uprops" }
icu_testdata = { version = "0.3", path = "../../provider/testdata", features = ["metadata"] }
log = "0.4"
reqwest = { version = "0.11", features = ["json", "stream", "gzip"] }
Expand Down
65 changes: 65 additions & 0 deletions tools/datagen/src/bin/datagen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use anyhow::Context;
use clap::{App, Arg, ArgGroup, ArgMatches};
use icu_locid::LanguageIdentifier;
use icu_provider::prelude::*;
use icu_provider::export::DataExporter;
use icu_provider::filter::Filterable;
use icu_provider::hello_world::{self, HelloWorldProvider};
Expand All @@ -20,6 +21,8 @@ use icu_provider_fs::export::fs_exporter;
use icu_provider_fs::export::serializers;
use icu_provider_fs::export::FilesystemExporter;
use icu_provider_fs::manifest;
use icu_properties::provider::key::{ALL_SET_KEYS, ALL_MAP_KEYS};
use icu_provider_uprops::{EnumeratedPropertyCodePointTrieProvider, PropertiesDataProvider};
use simple_logger::SimpleLogger;
use std::collections::HashSet;
use std::path::PathBuf;
Expand Down Expand Up @@ -261,6 +264,8 @@ fn main() -> anyhow::Result<()> {
if matches.is_present("ALL_KEYS") || matches.is_present("KEYS") {
let keys = matches.values_of("KEYS").map(|values| values.collect());
export_cldr(&matches, exporter, locales_vec.as_deref(), keys.as_ref())?;
export_set_props(&matches, exporter, keys.as_ref())?;
export_map_props(&matches, exporter, keys.as_ref())?;
}

if matches.is_present("HELLO_WORLD") {
Expand Down Expand Up @@ -402,6 +407,66 @@ fn export_cldr<'data>(
Ok(())
}

fn export_set_props<'data>(
matches: &ArgMatches,
exporter: &mut (impl DataExporter<'data, SerdeSeDataStructMarker> + ?Sized),
allowed_keys: Option<&HashSet<&str>>,
) -> anyhow::Result<()> {
let provider =
if let Some(path) = matches.value_of("CLDR_ROOT") {
PropertiesDataProvider::new(PathBuf::from(path))
} else {
anyhow::bail!("Value for --cldr-root must be specified",)
};

let keys = ALL_SET_KEYS;
let keys: Vec<ResourceKey> = if let Some(allowed_keys) = allowed_keys {
keys.into_iter()
.filter(|k| allowed_keys.contains(&*k.writeable_to_string()))
.copied()
.collect()
} else {
keys.to_vec()
};

for key in keys.iter() {
log::info!("Writing key: {}", key);
icu_provider::export::export_from_iterable(key, &provider, exporter)?;
}

Ok(())
}

fn export_map_props<'data>(
matches: &ArgMatches,
exporter: &mut (impl DataExporter<'data, SerdeSeDataStructMarker> + ?Sized),
allowed_keys: Option<&HashSet<&str>>,
) -> anyhow::Result<()> {
let provider =
if let Some(path) = matches.value_of("CLDR_ROOT") {
EnumeratedPropertyCodePointTrieProvider::new(PathBuf::from(path))
} else {
anyhow::bail!("Value for --cldr-root must be specified",)
};

let keys = ALL_MAP_KEYS;
let keys: Vec<ResourceKey> = if let Some(allowed_keys) = allowed_keys {
keys.into_iter()
.filter(|k| allowed_keys.contains(&*k.writeable_to_string()))
.copied()
.collect()
} else {
keys.to_vec()
};

for key in keys.iter() {
log::info!("Writing key: {}", key);
icu_provider::export::export_from_iterable(key, &provider, exporter)?;
}

Ok(())
}

fn export_hello_world<'data>(
_: &ArgMatches,
exporter: &mut (impl DataExporter<'data, SerdeSeDataStructMarker> + ?Sized),
Expand Down

0 comments on commit 77458bb

Please sign in to comment.