From 5ab686cbe2c86ff359519c496198f992a560eb8a Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Mon, 14 Feb 2022 14:25:08 -0700 Subject: [PATCH] Add postcard support to FsDataProvider (#1606) --- .gitignore | 3 +- Cargo.lock | 1 + provider/fs/Cargo.toml | 3 +- provider/fs/src/export/serializers/mod.rs | 3 ++ .../fs/src/export/serializers/postcard.rs | 41 +++++++++++++++++++ tools/datagen/src/bin/datagen.rs | 5 +++ tools/scripts/data.toml | 17 ++++++++ 7 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 provider/fs/src/export/serializers/postcard.rs diff --git a/.gitignore b/.gitignore index 3c63338f000..ec9035f118e 100644 --- a/.gitignore +++ b/.gitignore @@ -21,8 +21,9 @@ wasmpkg/ dhat-heap.json /benchmarks -# Do not check-in bincode test data +# Do not check-in binary file tree test data provider/testdata/data/bincode +provider/testdata/data/postcard tools/datagen/tests/testdata/work_log_bincode # Ignore irrelevant files that get generated on macOS diff --git a/Cargo.lock b/Cargo.lock index 46be9cb0c16..e12caa6e4e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1372,6 +1372,7 @@ dependencies = [ "icu_plurals", "icu_provider", "log", + "postcard", "serde", "serde-json-core", "serde_json", diff --git a/provider/fs/Cargo.toml b/provider/fs/Cargo.toml index a600d815ba7..216073d0028 100644 --- a/provider/fs/Cargo.toml +++ b/provider/fs/Cargo.toml @@ -47,6 +47,7 @@ writeable = { version = "0.3", path = "../../utils/writeable" } # Serializers serde_json = { version = "1.0", optional = true } bincode = { version = "1.3", optional = true } +postcard = { version = "0.7", features = ["use-std"], optional = true } # Dependencies for the export module log = { version = "0.4", optional = true } @@ -64,7 +65,7 @@ criterion = "0.3.3" deserialize_json = ["icu_provider/deserialize_json"] deserialize_bincode_1 = ["icu_provider/deserialize_bincode_1"] # Enables the "export" module and FilesystemExporter -export = ["static_assertions", "log", "serde_json", "bincode", "icu_provider/serialize"] +export = ["static_assertions", "log", "serde_json", "bincode", "postcard", "icu_provider/serialize"] bench = [] [lib] diff --git a/provider/fs/src/export/serializers/mod.rs b/provider/fs/src/export/serializers/mod.rs index 6f4105110ba..b1c953f3598 100644 --- a/provider/fs/src/export/serializers/mod.rs +++ b/provider/fs/src/export/serializers/mod.rs @@ -7,6 +7,9 @@ pub mod json; #[cfg(feature = "bincode")] pub mod bincode; +#[cfg(feature = "postcard")] +pub mod postcard; + use displaydoc::Display; use icu_provider::buf::BufferFormat; use std::io; diff --git a/provider/fs/src/export/serializers/postcard.rs b/provider/fs/src/export/serializers/postcard.rs new file mode 100644 index 00000000000..3e5a89899e0 --- /dev/null +++ b/provider/fs/src/export/serializers/postcard.rs @@ -0,0 +1,41 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::AbstractSerializer; +use super::Error; +use icu_provider::buf::BufferFormat; +use std::io; + +/// A serializer for Postcard. +pub struct Serializer; + +/// Options bag for initializing a [`postcard::Serializer`]. +#[non_exhaustive] +#[derive(Clone, Debug, PartialEq, Default)] +pub struct Options; + +impl AbstractSerializer for Serializer { + fn serialize( + &self, + obj: &dyn erased_serde::Serialize, + sink: &mut dyn io::Write, + ) -> Result<(), Error> { + let mut serializer = postcard::Serializer { + output: postcard::flavors::StdVec(Vec::new()), + }; + obj.erased_serialize(&mut ::erase(&mut serializer))?; + sink.write_all(&serializer.output.0)?; + Ok(()) + } + + fn get_buffer_format(&self) -> BufferFormat { + BufferFormat::Postcard07 + } +} + +impl Serializer { + pub fn new(_options: Options) -> Self { + Self {} + } +} diff --git a/tools/datagen/src/bin/datagen.rs b/tools/datagen/src/bin/datagen.rs index db417494788..f94a60c7463 100644 --- a/tools/datagen/src/bin/datagen.rs +++ b/tools/datagen/src/bin/datagen.rs @@ -83,6 +83,7 @@ fn main() -> eyre::Result<()> { .takes_value(true) .possible_value("json") .possible_value("bincode") + .possible_value("postcard") .help("File format syntax for data files."), ) .arg( @@ -341,6 +342,10 @@ fn get_fs_exporter(matches: &ArgMatches) -> eyre::Result { let options = serializers::bincode::Options::default(); Box::new(serializers::bincode::Serializer::new(options)) } + Some("postcard") => { + let options = serializers::postcard::Options::default(); + Box::new(serializers::postcard::Serializer::new(options)) + } _ => unreachable!(), }; diff --git a/tools/scripts/data.toml b/tools/scripts/data.toml index 40310705171..82a8dc0ab20 100644 --- a/tools/scripts/data.toml +++ b/tools/scripts/data.toml @@ -138,6 +138,23 @@ args = [ "--overwrite", ] +[tasks.testdata-build-postcard-tree] +description = "Build ICU4X Postcard data to a filesystem structure at provider/testdata/data/postcard, overwriting if already present. Useful for breaking out the postcard data into individual files for debugging." +category = "ICU4X Data" +command = "cargo" +args = [ + "run", + "--bin=icu4x-datagen", + "--features=experimental", + "--", + "--input-from-testdata", + "--out-testdata", + "--test-keys", + "--test-locales", + "--syntax=postcard", + "--overwrite", +] + [tasks.testdata-build-bincode-all] description = "Build ICU4X Bincode filesystem structure from the downloaded CLDR JSON, testing all available locales, and overwriting the existing ICU4X Bincode if present." category = "ICU4X Data"