Skip to content

Commit

Permalink
Add new Datagen API and CLI with FallbackOptions (#4710)
Browse files Browse the repository at this point in the history
  • Loading branch information
sffc authored Mar 27, 2024
1 parent b0051af commit 7397d7d
Show file tree
Hide file tree
Showing 12 changed files with 888 additions and 204 deletions.
2 changes: 1 addition & 1 deletion ffi/dart/tools/datagen/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ fn main() {
} else {
Default::default()
})
.with_all_locales()
.with_locales_and_fallback([LocaleFamily::full()], Default::default())
.export(
&DatagenProvider::new_latest_tested(),
BlobExporter::new_with_sink(Box::new(
Expand Down
7 changes: 5 additions & 2 deletions ffi/dart/tools/datagen/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,15 @@ fn main() -> eyre::Result<()> {
let locales = matches
.locales
.iter()
.map(|l| l.parse::<LanguageIdentifier>())
.map(|l| {
l.parse::<LanguageIdentifier>()
.map(LocaleFamily::with_descendants)
})
.collect::<Result<Vec<_>, _>>()?;

DatagenDriver::new()
.with_keys(keys)
.with_locales(locales)
.with_locales_and_fallback(locales, Default::default())
.export(
&ReexportableBlobDataProvider(BlobDataProvider::try_new_from_static_blob(
include_bytes!(concat!(core::env!("OUT_DIR"), "/all.blob")),
Expand Down
2 changes: 1 addition & 1 deletion provider/blob/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ log = { version = "0.4", optional = true }

[dev-dependencies]
icu_locid = { path = "../../components/locid", features = ["serde"] }
icu_datagen = { path = "../../provider/datagen", features = ["networking"] }
icu_datagen = { path = "../../provider/datagen", default-features = false, features = ["networking"] }

[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
criterion = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion provider/blob/tests/test_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const BLOB_V2: &[u8] = include_bytes!("data/v2.postcard");
fn run_driver(exporter: BlobExporter) -> Result<(), DataError> {
DatagenDriver::new()
.with_keys([icu_provider::hello_world::HelloWorldV1Marker::KEY])
.with_all_locales()
.with_locales_and_fallback([LocaleFamily::full()], Default::default())
.export(&DatagenProvider::new_custom(), exporter)
}

Expand Down
116 changes: 104 additions & 12 deletions provider/datagen/src/bin/datagen/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ enum CollationTable {
SearchAll,
}

// Mirrors crate::options::FallbackMode
// Mirrors crate::FallbackMode
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)]
enum Fallback {
Auto,
Expand All @@ -56,6 +56,20 @@ enum Fallback {
Preresolved,
}

// Mirrors crate::DeduplicationStrategy
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)]
enum DeduplicationStrategy {
Maximal,
None,
}

// Mirrors crate::RuntimeFallbackLocation
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)]
enum RuntimeFallbackLocation {
Internal,
External,
}

impl CollationTable {
fn to_datagen_value(self) -> &'static str {
match self {
Expand Down Expand Up @@ -231,8 +245,35 @@ pub struct Cli {

// TODO(#2856): Change the default to Auto in 2.0
#[arg(short, long, value_enum, default_value_t = Fallback::Hybrid)]
#[arg(
hide = true,
help = "Deprecated: use --deduplication-strategy, --runtime-fallback-location, or --without-fallback"
)]
fallback: Fallback,

#[arg(long)]
#[arg(
help = "disables locale fallback, instead exporting exactly the locales specified in --locales. \
Cannot be used with --deduplication-strategy, --runtime-fallback-location"
)]
without_fallback: bool,

#[arg(long, value_enum)]
#[arg(help = "configures where runtime fallback should take place in code. \
If not set, determined by the exporter: \
internal fallback is used if the exporter supports it. \
Cannot be used with --without-fallback")]
runtime_fallback_location: Option<RuntimeFallbackLocation>,

#[arg(long, value_enum)]
#[arg(
help = "configures the deduplication of locales for exported data payloads. \
If not set, determined by `runtime_fallback_location`: \
if internal fallback is enabled, a more aggressive deduplication strategy is used. \
Cannot be used with --without-fallback"
)]
deduplication_strategy: Option<DeduplicationStrategy>,

#[arg(long, num_args = 0.., default_value = "recommended")]
#[arg(
help = "Include these segmenter models in the output. Accepts multiple arguments. \
Expand All @@ -246,6 +287,9 @@ impl Cli {
Ok(config::Config {
keys: self.make_keys()?,
locales: self.make_locales()?,
without_fallback: self.make_without_fallback(),
deduplication_strategy: self.make_deduplication_strategy()?,
runtime_fallback_location: self.make_runtime_fallback_location()?,
cldr: self.make_path(&self.cldr_root, &self.cldr_tag, "cldr-root")?,
icu_export: self.make_path(
&self.icuexport_root,
Expand All @@ -272,13 +316,6 @@ impl Cli {
.collect(),
segmenter_models: self.make_segmenter_models()?,
export: self.make_exporter()?,
fallback: match self.fallback {
Fallback::Auto => config::FallbackMode::PreferredForExporter,
Fallback::Hybrid => config::FallbackMode::Hybrid,
Fallback::Runtime => config::FallbackMode::Runtime,
Fallback::RuntimeManual => config::FallbackMode::RuntimeManual,
Fallback::Preresolved => config::FallbackMode::Preresolved,
},
overwrite: self.overwrite,
})
}
Expand Down Expand Up @@ -346,10 +383,7 @@ impl Cli {
config::LocaleInclude::Explicit(
self.locales
.iter()
.map(|s| {
s.parse::<LanguageIdentifier>()
.with_context(|| s.to_string())
})
.map(|s| s.parse::<LocaleFamily>().with_context(|| s.to_string()))
.collect::<Result<_, eyre::Error>>()?,
)
})
Expand All @@ -372,6 +406,64 @@ impl Cli {
})
}

fn make_without_fallback(&self) -> bool {
self.without_fallback || matches!(self.fallback, Fallback::Preresolved)
}

fn make_deduplication_strategy(&self) -> eyre::Result<Option<config::DeduplicationStrategy>> {
match (
self.deduplication_strategy,
self.fallback,
self.without_fallback,
) {
(None, _, true) => Ok(None),
(Some(_), _, true) => {
eyre::bail!("cannot combine --without-fallback and --deduplication-strategy")
}
(Some(x), _, false) => Ok(match x {
DeduplicationStrategy::Maximal => Some(config::DeduplicationStrategy::Maximal),
DeduplicationStrategy::None => Some(config::DeduplicationStrategy::None),
}),
(None, fallback_mode, false) => Ok(match fallback_mode {
Fallback::Auto => None,
Fallback::Hybrid => Some(config::DeduplicationStrategy::None),
Fallback::Runtime => Some(config::DeduplicationStrategy::None),
Fallback::RuntimeManual => Some(config::DeduplicationStrategy::None),
Fallback::Preresolved => None,
}),
}
}

fn make_runtime_fallback_location(
&self,
) -> eyre::Result<Option<config::RuntimeFallbackLocation>> {
match (
self.runtime_fallback_location,
self.fallback,
self.without_fallback,
) {
(None, _, true) => Ok(None),
(Some(_), _, true) => {
eyre::bail!("cannot combine --without-fallback and --runtime-fallback-location")
}
(Some(x), _, false) => Ok(match x {
RuntimeFallbackLocation::Internal => {
Some(config::RuntimeFallbackLocation::Internal)
}
RuntimeFallbackLocation::External => {
Some(config::RuntimeFallbackLocation::External)
}
}),
(None, fallback_mode, false) => Ok(match fallback_mode {
Fallback::Auto => None,
Fallback::Hybrid => Some(config::RuntimeFallbackLocation::External),
Fallback::Runtime => Some(config::RuntimeFallbackLocation::Internal),
Fallback::RuntimeManual => Some(config::RuntimeFallbackLocation::External),
Fallback::Preresolved => None,
}),
}
}

fn make_segmenter_models(&self) -> eyre::Result<config::SegmenterModelInclude> {
Ok(if self.segmenter_models.as_slice() == ["none"] {
config::SegmenterModelInclude::None
Expand Down
12 changes: 9 additions & 3 deletions provider/datagen/src/bin/datagen/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

pub use icu_datagen::{CollationHanDatabase, CoverageLevel, FallbackMode, TrieType};
use icu_datagen::LocaleFamily;
pub use icu_datagen::{
CollationHanDatabase, CoverageLevel, DeduplicationStrategy, FallbackMode,
RuntimeFallbackLocation, TrieType,
};
pub use icu_locid::LanguageIdentifier;
use icu_provider::prelude::*;
use std::collections::{BTreeSet, HashSet};
Expand All @@ -12,8 +16,10 @@ use std::path::{Path, PathBuf};
#[serde(rename_all = "camelCase")]
pub struct Config {
pub keys: KeyInclude,
pub fallback: FallbackMode,
pub locales: LocaleInclude,
pub without_fallback: bool,
pub deduplication_strategy: Option<DeduplicationStrategy>,
pub runtime_fallback_location: Option<RuntimeFallbackLocation>,
#[serde(
default,
skip_serializing_if = "is_default",
Expand Down Expand Up @@ -81,7 +87,7 @@ pub enum LocaleInclude {
Recommended,
All,
None,
Explicit(#[serde(serialize_with = "sorted_set")] HashSet<LanguageIdentifier>),
Explicit(#[serde(serialize_with = "sorted_set")] HashSet<LocaleFamily>),
CldrSet(#[serde(serialize_with = "sorted_set")] HashSet<CoverageLevel>),
}

Expand Down
68 changes: 52 additions & 16 deletions provider/datagen/src/bin/datagen/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use clap::Parser;
use eyre::WrapErr;
use icu_datagen::prelude::*;
use simple_logger::SimpleLogger;
use writeable::Writeable;

mod args;
pub mod config;
Expand Down Expand Up @@ -86,23 +87,58 @@ fn main() -> eyre::Result<()> {
config::KeyInclude::Explicit(set) => driver.with_keys(set),
config::KeyInclude::ForBinary(path) => driver.with_keys(icu_datagen::keys_from_bin(path)?),
};
driver = driver.with_fallback_mode(config.fallback);
driver = driver.with_additional_collations(config.additional_collations);
driver = match config.locales {
config::LocaleInclude::All => driver.with_all_locales(),
config::LocaleInclude::None => driver.with_locales([]),
config::LocaleInclude::Explicit(set) => driver.with_locales(set),
config::LocaleInclude::CldrSet(levels) => {
driver.with_locales(provider.locales_for_coverage_levels(levels.iter().copied())?)
}
config::LocaleInclude::Recommended => {
driver.with_locales(provider.locales_for_coverage_levels([
CoverageLevel::Modern,
CoverageLevel::Moderate,
CoverageLevel::Basic,
])?)
}
enum LanguageIdentifiersOrLocaleFamilies {
LanguageIdentifiers(Vec<LanguageIdentifier>),
LocaleFamilies(Vec<LocaleFamily>),
AllLocales,
}
use LanguageIdentifiersOrLocaleFamilies::*;
let locales_intermediate: LanguageIdentifiersOrLocaleFamilies = match config.locales {
config::LocaleInclude::All => AllLocales,
config::LocaleInclude::None => LanguageIdentifiers(vec![]),
config::LocaleInclude::Explicit(set) => LocaleFamilies(set.into_iter().collect()),
config::LocaleInclude::CldrSet(levels) => LanguageIdentifiers(
provider
.locales_for_coverage_levels(levels.iter().copied())?
.into_iter()
.collect(),
),
config::LocaleInclude::Recommended => LanguageIdentifiers(
provider
.locales_for_coverage_levels([
CoverageLevel::Modern,
CoverageLevel::Moderate,
CoverageLevel::Basic,
])?
.into_iter()
.collect(),
),
};
if config.without_fallback {
let locale_families = match locales_intermediate {
AllLocales => eyre::bail!("--without-fallback needs an explicit locale list"),
LanguageIdentifiers(lids) => lids,
LocaleFamilies(lfs) => lfs
.into_iter()
.map(|family| family.write_to_string().parse())
.collect::<Result<Vec<LanguageIdentifier>, icu_locid::ParserError>>()?,
};
driver = driver.with_locales_no_fallback(locale_families, Default::default());
} else {
let locale_families = match locales_intermediate {
AllLocales => vec![LocaleFamily::full()],
LanguageIdentifiers(lids) => lids
.into_iter()
.map(LocaleFamily::with_descendants)
.collect(),
LocaleFamilies(lfs) => lfs,
};
let mut options: FallbackOptions = Default::default();
options.deduplication_strategy = config.deduplication_strategy;
options.runtime_fallback_location = config.runtime_fallback_location;
driver = driver.with_locales_and_fallback(locale_families, options);
}
driver = driver.with_additional_collations(config.additional_collations);
driver = match config.segmenter_models {
config::SegmenterModelInclude::None => driver.with_segmenter_models([]),
config::SegmenterModelInclude::Recommended => driver.with_segmenter_models([
Expand Down
Loading

0 comments on commit 7397d7d

Please sign in to comment.