Skip to content

Commit

Permalink
Add BlobSchema V2 with ZeroTrie (#4207)
Browse files Browse the repository at this point in the history
  • Loading branch information
sffc authored Nov 2, 2023
1 parent ee7d02f commit f223e11
Show file tree
Hide file tree
Showing 15 changed files with 469 additions and 90 deletions.
1 change: 1 addition & 0 deletions .github/workflows/artifacts-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ jobs:
- components/segmenter
- experimental/transliterate
- experimental/zerotrie
- provider/blob
- utils/fixed_decimal
- utils/litemap
- utils/tinystr
Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions provider/blob/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,17 @@ postcard = { version = "1.0.0", default-features = false, features = ["alloc"] }
serde = { version = "1.0", default-features = false, features = ["alloc"] }
writeable = {workspace = true }
zerovec = { workspace = true, features = ["serde", "yoke"] }
zerotrie = { workspace = true, features = ["serde", "zerovec"] }

log = { version = "0.4", optional = true }

[dev-dependencies]
icu_locid = { path = "../../components/locid", features = ["serde"] }
icu_datagen = { path = "../../provider/datagen", features = ["networking"] }

[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
criterion = "0.4"

[features]
std = ["icu_provider/std"]
export = [
Expand All @@ -40,4 +44,10 @@ export = [
"postcard/alloc",
"std",
"zerovec/serde",
"zerotrie/alloc",
"zerotrie/litemap",
]

[[bench]]
name = "blob_version_bench"
harness = false
62 changes: 62 additions & 0 deletions provider/blob/benches/blob_version_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

extern crate alloc;

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use icu_provider::datagen::IterableDataProvider;
use icu_provider::hello_world::*;
use icu_provider::prelude::*;
use icu_provider_blob::BlobDataProvider;

const BLOB_V1: &[u8] = include_bytes!("../tests/data/v1.postcard");
const BLOB_V2: &[u8] = include_bytes!("../tests/data/v2.postcard");

fn blob_version_bench(c: &mut Criterion) {
c.bench_function("provider/construct/v1", |b| {
b.iter(|| BlobDataProvider::try_new_from_static_blob(black_box(BLOB_V1)).unwrap());
});
c.bench_function("provider/construct/v2", |b| {
b.iter(|| BlobDataProvider::try_new_from_static_blob(black_box(BLOB_V1)).unwrap());
});

let hello_world_provider = HelloWorldProvider;
let locales = hello_world_provider.supported_locales().unwrap();

c.bench_function("provider/read/v1", |b| {
let provider = BlobDataProvider::try_new_from_static_blob(black_box(BLOB_V1)).unwrap();
b.iter(|| {
for locale in black_box(&locales).iter() {
black_box(&provider)
.load_buffer(
HelloWorldV1Marker::KEY,
DataRequest {
locale,
metadata: Default::default(),
},
)
.unwrap();
}
});
});
c.bench_function("provider/read/v2", |b| {
let provider = BlobDataProvider::try_new_from_static_blob(black_box(BLOB_V2)).unwrap();
b.iter(|| {
for locale in black_box(&locales).iter() {
black_box(&provider)
.load_buffer(
HelloWorldV1Marker::KEY,
DataRequest {
locale,
metadata: Default::default(),
},
)
.unwrap();
}
});
});
}

criterion_group!(benches, blob_version_bench,);
criterion_main!(benches);
133 changes: 77 additions & 56 deletions provider/blob/src/blob_data_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::blob_schema::{BlobSchema, BlobSchemaV1};
use crate::blob_schema::BlobSchema;
use alloc::boxed::Box;
use icu_provider::buf::BufferFormat;
use icu_provider::prelude::*;
Expand Down Expand Up @@ -38,7 +38,7 @@ use yoke::*;
/// // Read an ICU4X data blob dynamically:
/// let blob = std::fs::read(concat!(
/// env!("CARGO_MANIFEST_DIR"),
/// "/tests/data/hello_world.postcard",
/// "/tests/data/v2.postcard",
/// ))
/// .expect("Reading pre-computed postcard buffer");
///
Expand Down Expand Up @@ -69,7 +69,7 @@ use yoke::*;
/// // Read an ICU4X data blob statically:
/// const HELLO_WORLD_BLOB: &[u8] = include_bytes!(concat!(
/// env!("CARGO_MANIFEST_DIR"),
/// "/tests/data/hello_world.postcard"
/// "/tests/data/v2.postcard"
/// ));
///
/// // Create a DataProvider from it:
Expand All @@ -87,7 +87,7 @@ use yoke::*;
/// ```
#[derive(Clone)]
pub struct BlobDataProvider {
data: Yoke<BlobSchemaV1<'static>, Option<Cart>>,
data: Yoke<BlobSchema<'static>, Option<Cart>>,
}

impl core::fmt::Debug for BlobDataProvider {
Expand All @@ -103,7 +103,7 @@ impl BlobDataProvider {
pub fn try_new_from_blob(blob: Box<[u8]>) -> Result<Self, DataError> {
Ok(Self {
data: Cart::try_make_yoke(blob, |bytes| {
BlobSchema::deserialize_v1(&mut postcard::Deserializer::from_bytes(bytes))
BlobSchema::deserialize_and_check(&mut postcard::Deserializer::from_bytes(bytes))
})?,
})
}
Expand All @@ -112,7 +112,7 @@ impl BlobDataProvider {
/// [`try_new_from_blob`](BlobDataProvider::try_new_from_blob) and is allocation-free.
pub fn try_new_from_static_blob(blob: &'static [u8]) -> Result<Self, DataError> {
Ok(Self {
data: Yoke::new_owned(BlobSchema::deserialize_v1(
data: Yoke::new_owned(BlobSchema::deserialize_and_check(
&mut postcard::Deserializer::from_bytes(blob),
)?),
})
Expand Down Expand Up @@ -150,61 +150,82 @@ mod test {

#[test]
fn test_empty() {
let mut blob: Vec<u8> = Vec::new();

{
let mut exporter = BlobExporter::new_with_sink(Box::new(&mut blob));

exporter.flush(HelloWorldV1Marker::KEY).unwrap();

exporter.close().unwrap();
for version in [1, 2] {
let mut blob: Vec<u8> = Vec::new();

{
let mut exporter = if version == 1 {
BlobExporter::new_with_sink(Box::new(&mut blob))
} else {
BlobExporter::new_v2_with_sink(Box::new(&mut blob))
};

exporter.flush(HelloWorldV1Marker::KEY).unwrap();

exporter.close().unwrap();
}

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(
matches!(
provider.load_buffer(HelloWorldV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
),
"(version: {version})"
);
}

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(matches!(
provider.load_buffer(HelloWorldV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
));
}

#[test]
fn test_singleton() {
let mut blob: Vec<u8> = Vec::new();

{
let mut exporter = BlobExporter::new_with_sink(Box::new(&mut blob));

exporter.flush(HelloSingletonV1Marker::KEY).unwrap();

exporter.close().unwrap();
for version in [1, 2] {
let mut blob: Vec<u8> = Vec::new();

{
let mut exporter = if version == 1 {
BlobExporter::new_with_sink(Box::new(&mut blob))
} else {
BlobExporter::new_v2_with_sink(Box::new(&mut blob))
};

exporter.flush(HelloSingletonV1Marker::KEY).unwrap();

exporter.close().unwrap();
}

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(
matches!(
provider.load_buffer(
HelloSingletonV1Marker::KEY,
DataRequest {
locale: &icu_locid::locale!("de").into(),
metadata: Default::default()
}
),
Err(DataError {
kind: DataErrorKind::ExtraneousLocale,
..
})
),
"(version: {version})"
);

assert!(
matches!(
provider.load_buffer(HelloSingletonV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
),
"(version: {version})"
);
}

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(matches!(
provider.load_buffer(
HelloSingletonV1Marker::KEY,
DataRequest {
locale: &icu_locid::locale!("de").into(),
metadata: Default::default()
}
),
Err(DataError {
kind: DataErrorKind::ExtraneousLocale,
..
})
));

assert!(matches!(
provider.load_buffer(HelloSingletonV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
));
}
}
Loading

0 comments on commit f223e11

Please sign in to comment.