Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use TinyAsciiStr instead of &'static str for baked data #5159

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions provider/baked/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ itertools = { workspace = true, optional = true }
log = { workspace = true, optional = true }
proc-macro2 = { workspace = true, optional = true }
heck = { workspace = true, optional = true }
tinystr.workspace = true

[dev-dependencies]
icu_provider = { path = "../core", features = ["export"] }
Expand Down
112 changes: 71 additions & 41 deletions provider/baked/src/binary_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@

//! Data stored as slices, looked up with binary search

pub extern crate tinystr;

use alloc::borrow::ToOwned;
#[cfg(feature = "export")]
use databake::*;
use icu_provider::prelude::*;
use tinystr::TinyAsciiStr;

#[cfg(feature = "export")]
pub(crate) fn bake(
Expand All @@ -17,7 +21,18 @@ pub(crate) fn bake(
let mut size = 0;

// Data.0 is a fat pointer
size += core::mem::size_of::<&[()]>();
size += core::mem::size_of::<Data<Locale<1>, icu_provider::hello_world::HelloWorldV1Marker>>();

let max_attributes_len = ids_to_idents
.iter()
.map(|(id, _)| id.marker_attributes.len())
.max()
.unwrap();
let max_locale_len = ids_to_idents
.iter()
.map(|(id, _)| id.locale.to_string().len())
.max()
.unwrap();

// The idents are references
size += ids_to_idents.len() * core::mem::size_of::<&()>();
Expand All @@ -29,50 +44,55 @@ pub(crate) fn bake(
)
});

let (ty, id_bakes_to_idents) = if ids_to_idents
let (ty, keys) = if ids_to_idents
.iter()
.all(|(id, _)| id.marker_attributes.is_empty())
{
// Only DataLocales
size += ids_to_idents.len() * core::mem::size_of::<&str>();
size += ids_to_idents.len() * max_locale_len;
(
quote! { icu_provider_baked::binary_search::Locale },
quote! { icu_provider_baked::binary_search::Locale<#max_locale_len> },
ids_to_idents
.iter()
.map(|(id, ident)| {
.map(|(id, _)| {
let k = id.locale.to_string();
quote!((#k, #ident))
quote!(tinystr!(#max_locale_len, #k))
})
.collect::<Vec<_>>(),
)
} else if ids_to_idents.iter().all(|(id, _)| id.locale.is_und()) {
} else if ids_to_idents.iter().all(|(id, _)| id.locale.is_empty()) {
// Only marker attributes
size += ids_to_idents.len() * core::mem::size_of::<&str>();
size += ids_to_idents.len() * max_attributes_len;
(
quote! { icu_provider_baked::binary_search::Attributes },
quote! { icu_provider_baked::binary_search::Attributes<#max_attributes_len> },
ids_to_idents
.iter()
.map(|(id, ident)| {
.map(|(id, _)| {
let k = id.marker_attributes.as_str();
quote!((#k, #ident))
quote!(tinystr!(#max_attributes_len, #k))
})
.collect(),
)
} else {
size += ids_to_idents.len() * 2 * core::mem::size_of::<&str>();
size += ids_to_idents.len() * (max_attributes_len + max_locale_len);
(
quote! { icu_provider_baked::binary_search::AttributesAndLocale },
quote! { icu_provider_baked::binary_search::AttributesAndLocale<#max_attributes_len, #max_locale_len> },
ids_to_idents
.iter()
.map(|(id, ident)| {
.map(|(id, _)| {
let k0 = id.marker_attributes.as_str();
let k1 = id.locale.to_string();
quote!(((#k0, #k1), #ident))
quote!((
tinystr!(#max_attributes_len, #k0),
tinystr!(#max_locale_len, #k1)
))
})
.collect(),
)
};

let values = ids_to_idents.iter().map(|(_, ident)| ident);

let idents_to_bakes = idents_to_bakes.into_iter().map(|(ident, bake)| {
quote! {
const #ident: &S = &#bake;
Expand All @@ -81,32 +101,41 @@ pub(crate) fn bake(

(
quote! {
icu_provider_baked::binary_search::Data<#ty, #marker_bake> = {
type S = <#marker_bake as icu_provider::DynamicDataMarker>::Yokeable;
#(#idents_to_bakes)*
icu_provider_baked::binary_search::Data(&[#(#id_bakes_to_idents,)*])
}
icu_provider_baked::binary_search::Data<#ty, #marker_bake> = icu_provider_baked::binary_search::Data(
{
use icu_provider_baked::binary_search::tinystr::tinystr;
&[#(#keys,)*]
},
{
type S = <#marker_bake as icu_provider::DynamicDataMarker>::Yokeable;
#(#idents_to_bakes)*
&[#(#values,)*]
},
)
},
size,
)
}

pub struct Data<K: BinarySearchKey, M: DataMarker>(pub &'static [(K::Type, &'static M::Yokeable)]);
pub struct Data<K: BinarySearchKey, M: DataMarker>(
pub &'static [K::Type],
pub &'static [&'static M::Yokeable],
);

impl<K: BinarySearchKey, M: DataMarker> super::DataStore<M> for Data<K, M> {
fn get(&self, id: DataIdentifierBorrowed) -> Option<&'static M::Yokeable> {
self.0
.binary_search_by(|&(k, _)| K::cmp(k, id))
.map(|i| unsafe { self.0.get_unchecked(i) }.1)
.binary_search_by(|&k| K::cmp(k, id))
.map(|i| *unsafe { self.1.get_unchecked(i) })
.ok()
}

type IterReturn = core::iter::Map<
core::slice::Iter<'static, (K::Type, &'static M::Yokeable)>,
fn(&'static (K::Type, &'static M::Yokeable)) -> DataIdentifierCow<'static>,
core::slice::Iter<'static, K::Type>,
fn(&'static K::Type) -> DataIdentifierCow<'static>,
>;
fn iter(&self) -> Self::IterReturn {
self.0.iter().map(|&(k, _)| K::to_id(k))
self.0.iter().map(|&k| K::to_id(k))
}
}

Expand All @@ -117,10 +146,10 @@ pub trait BinarySearchKey: 'static {
fn to_id(k: Self::Type) -> DataIdentifierCow<'static>;
}

pub struct Locale;
pub struct Locale<const N: usize>;

impl BinarySearchKey for Locale {
type Type = &'static str;
impl<const N: usize> BinarySearchKey for Locale<N> {
type Type = TinyAsciiStr<N>;

fn cmp(locale: Self::Type, id: DataIdentifierBorrowed) -> core::cmp::Ordering {
id.locale.strict_cmp(locale.as_bytes()).reverse()
Expand All @@ -131,36 +160,37 @@ impl BinarySearchKey for Locale {
}
}

pub struct Attributes;
pub struct Attributes<const N: usize>;

impl BinarySearchKey for Attributes {
type Type = &'static str;
impl<const N: usize> BinarySearchKey for Attributes<N> {
type Type = TinyAsciiStr<N>;

fn cmp(attributes: Self::Type, id: DataIdentifierBorrowed) -> core::cmp::Ordering {
attributes.cmp(id.marker_attributes)
attributes.as_str().cmp(id.marker_attributes)
}

fn to_id(attributes: Self::Type) -> DataIdentifierCow<'static> {
DataIdentifierCow::from_marker_attributes(DataMarkerAttributes::from_str_or_panic(
attributes,
))
DataIdentifierCow::from_marker_attributes_owned(
DataMarkerAttributes::from_str_or_panic(attributes.as_str()).to_owned(),
)
}
}

pub struct AttributesAndLocale;
pub struct AttributesAndLocale<const N: usize, const M: usize>;

impl BinarySearchKey for AttributesAndLocale {
type Type = (&'static str, &'static str);
impl<const N: usize, const M: usize> BinarySearchKey for AttributesAndLocale<N, M> {
type Type = (TinyAsciiStr<N>, TinyAsciiStr<M>);

fn cmp((attributes, locale): Self::Type, id: DataIdentifierBorrowed) -> core::cmp::Ordering {
attributes
.as_str()
.cmp(id.marker_attributes)
.then_with(|| id.locale.strict_cmp(locale.as_bytes()).reverse())
}

fn to_id((attributes, locale): Self::Type) -> DataIdentifierCow<'static> {
DataIdentifierCow::from_borrowed_and_owned(
DataMarkerAttributes::from_str_or_panic(attributes),
DataIdentifierCow::from_owned(
DataMarkerAttributes::from_str_or_panic(attributes.as_str()).to_owned(),
locale.parse().unwrap(),
)
}
Expand Down
2 changes: 1 addition & 1 deletion provider/baked/tests/data/fingerprints.csv
Original file line number Diff line number Diff line change
@@ -1 +1 @@
core/helloworld@1, <lookup>, 1096B, 27 identifiers
core/helloworld@1, <lookup>, 869B, 27 identifiers
70 changes: 38 additions & 32 deletions provider/baked/tests/data/hello_world_v1_marker.rs.data
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
/// `icu`'s `_unstable` constructors.
///
/// Using this implementation will embed the following data in the binary's data segment:
/// * 1096B[^1] for the lookup data structure (27 data identifiers)
/// * 869B[^1] for the lookup data structure (27 data identifiers)
/// * 1100B[^1] for the actual data (27 unique structs)
///
/// [^1]: these numbers can be smaller in practice due to linker deduplication
Expand All @@ -16,37 +16,43 @@ macro_rules! __impl_hello_world_v1_marker {
const _: () = <$provider>::MUST_USE_MAKE_PROVIDER_MACRO;
#[clippy::msrv = "1.70"]
impl $provider {
const DATA_HELLO_WORLD_V1_MARKER: icu_provider_baked::binary_search::Data<icu_provider_baked::binary_search::AttributesAndLocale, icu_provider::hello_world::HelloWorldV1Marker> = {
type S = <icu_provider::hello_world::HelloWorldV1Marker as icu_provider::DynamicDataMarker>::Yokeable;
const _REVERSE_EN: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Olleh Dlrow") };
const _REVERSE_JA: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("界世はちにんこ") };
const __BN: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("ওহে বিশ\u{9cd}ব") };
const __CS: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Ahoj světe") };
const __DE: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hallo Welt") };
const __DE_AT: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Servus Welt") };
const __EL: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Καλημέρα κόσμε") };
const __EN: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello World") };
const __EN_001: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello from 🗺\u{fe0f}") };
const __EN_002: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello from 🌍") };
const __EN_019: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello from 🌎") };
const __EN_142: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello from 🌏") };
const __EN_GB: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello from 🇬🇧") };
const __EN_GB_U_SD_GBENG: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello from 🏴\u{e0067}\u{e0062}\u{e0065}\u{e006e}\u{e0067}\u{e007f}") };
const __EO: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Saluton, Mondo") };
const __FA: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("سلام دنیا\u{200e}") };
const __FI: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("hei maailma") };
const __IS: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Halló, heimur") };
const __JA: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("こんにちは世界") };
const __LA: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Ave, munde") };
const __PT: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Olá, mundo") };
const __RO: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Salut, lume") };
const __RU: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Привет, мир") };
const __SR: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Поздрав свете") };
const __SR_LATN: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Pozdrav svete") };
const __VI: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Xin chào thế giới") };
const __ZH: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("你好世界") };
icu_provider_baked::binary_search::Data(&[(("", "bn"), __BN), (("", "cs"), __CS), (("", "de"), __DE), (("", "de-AT"), __DE_AT), (("", "el"), __EL), (("", "en"), __EN), (("", "en-001"), __EN_001), (("", "en-002"), __EN_002), (("", "en-019"), __EN_019), (("", "en-142"), __EN_142), (("", "en-GB"), __EN_GB), (("", "en-GB-u-sd-gbeng"), __EN_GB_U_SD_GBENG), (("", "eo"), __EO), (("", "fa"), __FA), (("", "fi"), __FI), (("", "is"), __IS), (("", "ja"), __JA), (("", "la"), __LA), (("", "pt"), __PT), (("", "ro"), __RO), (("", "ru"), __RU), (("", "sr"), __SR), (("", "sr-Latn"), __SR_LATN), (("", "vi"), __VI), (("", "zh"), __ZH), (("reverse", "en"), _REVERSE_EN), (("reverse", "ja"), _REVERSE_JA)])
};
const DATA_HELLO_WORLD_V1_MARKER: icu_provider_baked::binary_search::Data<icu_provider_baked::binary_search::AttributesAndLocale<7usize, 16usize>, icu_provider::hello_world::HelloWorldV1Marker> = icu_provider_baked::binary_search::Data(
{
use icu_provider_baked::binary_search::tinystr::tinystr;
&[(tinystr!(7usize, ""), tinystr!(16usize, "bn")), (tinystr!(7usize, ""), tinystr!(16usize, "cs")), (tinystr!(7usize, ""), tinystr!(16usize, "de")), (tinystr!(7usize, ""), tinystr!(16usize, "de-AT")), (tinystr!(7usize, ""), tinystr!(16usize, "el")), (tinystr!(7usize, ""), tinystr!(16usize, "en")), (tinystr!(7usize, ""), tinystr!(16usize, "en-001")), (tinystr!(7usize, ""), tinystr!(16usize, "en-002")), (tinystr!(7usize, ""), tinystr!(16usize, "en-019")), (tinystr!(7usize, ""), tinystr!(16usize, "en-142")), (tinystr!(7usize, ""), tinystr!(16usize, "en-GB")), (tinystr!(7usize, ""), tinystr!(16usize, "en-GB-u-sd-gbeng")), (tinystr!(7usize, ""), tinystr!(16usize, "eo")), (tinystr!(7usize, ""), tinystr!(16usize, "fa")), (tinystr!(7usize, ""), tinystr!(16usize, "fi")), (tinystr!(7usize, ""), tinystr!(16usize, "is")), (tinystr!(7usize, ""), tinystr!(16usize, "ja")), (tinystr!(7usize, ""), tinystr!(16usize, "la")), (tinystr!(7usize, ""), tinystr!(16usize, "pt")), (tinystr!(7usize, ""), tinystr!(16usize, "ro")), (tinystr!(7usize, ""), tinystr!(16usize, "ru")), (tinystr!(7usize, ""), tinystr!(16usize, "sr")), (tinystr!(7usize, ""), tinystr!(16usize, "sr-Latn")), (tinystr!(7usize, ""), tinystr!(16usize, "vi")), (tinystr!(7usize, ""), tinystr!(16usize, "zh")), (tinystr!(7usize, "reverse"), tinystr!(16usize, "en")), (tinystr!(7usize, "reverse"), tinystr!(16usize, "ja"))]
},
{
type S = <icu_provider::hello_world::HelloWorldV1Marker as icu_provider::DynamicDataMarker>::Yokeable;
const _REVERSE_EN: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Olleh Dlrow") };
const _REVERSE_JA: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("界世はちにんこ") };
const __BN: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("ওহে বিশ\u{9cd}ব") };
const __CS: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Ahoj světe") };
const __DE: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hallo Welt") };
const __DE_AT: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Servus Welt") };
const __EL: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Καλημέρα κόσμε") };
const __EN: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello World") };
const __EN_001: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello from 🗺\u{fe0f}") };
const __EN_002: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello from 🌍") };
const __EN_019: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello from 🌎") };
const __EN_142: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello from 🌏") };
const __EN_GB: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello from 🇬🇧") };
const __EN_GB_U_SD_GBENG: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Hello from 🏴\u{e0067}\u{e0062}\u{e0065}\u{e006e}\u{e0067}\u{e007f}") };
const __EO: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Saluton, Mondo") };
const __FA: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("سلام دنیا\u{200e}") };
const __FI: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("hei maailma") };
const __IS: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Halló, heimur") };
const __JA: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("こんにちは世界") };
const __LA: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Ave, munde") };
const __PT: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Olá, mundo") };
const __RO: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Salut, lume") };
const __RU: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Привет, мир") };
const __SR: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Поздрав свете") };
const __SR_LATN: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Pozdrav svete") };
const __VI: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("Xin chào thế giới") };
const __ZH: &S = &icu_provider::hello_world::HelloWorldV1 { message: alloc::borrow::Cow::Borrowed("你好世界") };
&[__BN, __CS, __DE, __DE_AT, __EL, __EN, __EN_001, __EN_002, __EN_019, __EN_142, __EN_GB, __EN_GB_U_SD_GBENG, __EO, __FA, __FI, __IS, __JA, __LA, __PT, __RO, __RU, __SR, __SR_LATN, __VI, __ZH, _REVERSE_EN, _REVERSE_JA]
},
);
}
#[clippy::msrv = "1.70"]
impl icu_provider::DataProvider<icu_provider::hello_world::HelloWorldV1Marker> for $provider {
Expand Down
Loading
Loading