-
Notifications
You must be signed in to change notification settings - Fork 182
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use TinyAsciiStr
instead of &'static str
for baked data
#5159
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems plausible; can you measure the actual compiled binary size before and after?
@@ -42,7 +42,8 @@ macro_rules! __impl_collation_reordering_v1_marker { | |||
const __TH: &S = &icu::collator::provider::CollationReorderingV1 { min_high_no_reorder: 1929379840u32, reorder_table: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\0\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0B\x0C\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopq'stuvwxyz{|}~\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF") }, reorder_ranges: zerovec::ZeroVec::new() }; | |||
const __YUE_U_CO_STROKE: &S = &icu::collator::provider::CollationReorderingV1 { min_high_no_reorder: 4261412864u32, reorder_table: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\0\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0B\x0C\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'(\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\0\xF9\xFA'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xFE\xFF") }, reorder_ranges: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b",\0v{}\0\0~\xA9\xFF\0\xFE") } }; | |||
const __ZH: &S = &icu::collator::provider::CollationReorderingV1 { min_high_no_reorder: 4261412864u32, reorder_table: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\0\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0B\x0C\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'(\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xFE\xFF") }, reorder_ranges: zerovec::ZeroVec::new() }; | |||
icu_provider_baked::binary_search::Data(&[("am", __AM), ("ar", __AR), ("as", __AS), ("az", __AZ), ("be", __BE), ("bg", __BE), ("bn", __AS), ("bs", __AZ), ("bs-Cyrl", __BE), ("chr", __CHR), ("el", __EL), ("fa", __AR), ("gu", __GU), ("he", __HE), ("hi", __HI), ("hr", __AZ), ("hy", __HY), ("ja", __JA), ("ka", __KA), ("kk", __BE), ("km", __KM), ("kn", __KN), ("ko", __KO), ("kok", __HI), ("ku", __KU), ("ky", __BE), ("lo", __LO), ("mk", __BE), ("ml", __ML), ("mn", __MN), ("mr", __HI), ("my", __MY), ("ne", __NE), ("or", __OR), ("pa", __PA), ("ps", __AR), ("ru", __BE), ("si", __SI), ("sr", __BE), ("sr-Latn", __AZ), ("ta", __TA), ("te", __TE), ("th", __TH), ("ug", __AR), ("uk", __BE), ("ur", __AR), ("yue-u-co-stroke", __YUE_U_CO_STROKE), ("yue-u-co-unihan", __YUE_U_CO_STROKE), ("yue-u-co-zhuyin", __YUE_U_CO_STROKE), ("zh", __ZH), ("zh-Hant-u-co-stroke", __YUE_U_CO_STROKE), ("zh-Hant-u-co-unihan", __YUE_U_CO_STROKE), ("zh-Hant-u-co-zhuyin", __YUE_U_CO_STROKE), ("zh-u-co-stroke", __YUE_U_CO_STROKE), ("zh-u-co-unihan", __YUE_U_CO_STROKE), ("zh-u-co-zhuyin", __YUE_U_CO_STROKE)]) | |||
use icu_provider_baked::binary_search::tinystr::tinystr; | |||
icu_provider_baked::binary_search::Data(&[(tinystr!(19usize, "am"), __AM), (tinystr!(19usize, "ar"), __AR), (tinystr!(19usize, "as"), __AS), (tinystr!(19usize, "az"), __AZ), (tinystr!(19usize, "be"), __BE), (tinystr!(19usize, "bg"), __BE), (tinystr!(19usize, "bn"), __AS), (tinystr!(19usize, "bs"), __AZ), (tinystr!(19usize, "bs-Cyrl"), __BE), (tinystr!(19usize, "chr"), __CHR), (tinystr!(19usize, "el"), __EL), (tinystr!(19usize, "fa"), __AR), (tinystr!(19usize, "gu"), __GU), (tinystr!(19usize, "he"), __HE), (tinystr!(19usize, "hi"), __HI), (tinystr!(19usize, "hr"), __AZ), (tinystr!(19usize, "hy"), __HY), (tinystr!(19usize, "ja"), __JA), (tinystr!(19usize, "ka"), __KA), (tinystr!(19usize, "kk"), __BE), (tinystr!(19usize, "km"), __KM), (tinystr!(19usize, "kn"), __KN), (tinystr!(19usize, "ko"), __KO), (tinystr!(19usize, "kok"), __HI), (tinystr!(19usize, "ku"), __KU), (tinystr!(19usize, "ky"), __BE), (tinystr!(19usize, "lo"), __LO), (tinystr!(19usize, "mk"), __BE), (tinystr!(19usize, "ml"), __ML), (tinystr!(19usize, "mn"), __MN), (tinystr!(19usize, "mr"), __HI), (tinystr!(19usize, "my"), __MY), (tinystr!(19usize, "ne"), __NE), (tinystr!(19usize, "or"), __OR), (tinystr!(19usize, "pa"), __PA), (tinystr!(19usize, "ps"), __AR), (tinystr!(19usize, "ru"), __BE), (tinystr!(19usize, "si"), __SI), (tinystr!(19usize, "sr"), __BE), (tinystr!(19usize, "sr-Latn"), __AZ), (tinystr!(19usize, "ta"), __TA), (tinystr!(19usize, "te"), __TE), (tinystr!(19usize, "th"), __TH), (tinystr!(19usize, "ug"), __AR), (tinystr!(19usize, "uk"), __BE), (tinystr!(19usize, "ur"), __AR), (tinystr!(19usize, "yue-u-co-stroke"), __YUE_U_CO_STROKE), (tinystr!(19usize, "yue-u-co-unihan"), __YUE_U_CO_STROKE), (tinystr!(19usize, "yue-u-co-zhuyin"), __YUE_U_CO_STROKE), (tinystr!(19usize, "zh"), __ZH), (tinystr!(19usize, "zh-Hant-u-co-stroke"), __YUE_U_CO_STROKE), (tinystr!(19usize, "zh-Hant-u-co-unihan"), __YUE_U_CO_STROKE), (tinystr!(19usize, "zh-Hant-u-co-zhuyin"), __YUE_U_CO_STROKE), (tinystr!(19usize, "zh-u-co-stroke"), __YUE_U_CO_STROKE), (tinystr!(19usize, "zh-u-co-unihan"), __YUE_U_CO_STROKE), (tinystr!(19usize, "zh-u-co-zhuyin"), __YUE_U_CO_STROKE)]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thought: This will improve when we move -u-co-xxx
to DataMarkerAttributes... we haven't done that yet?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, this was part of #4984
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok but we are still planning to do it?
No I don't think I can |
Of course you can; take one of the example binaries built with compiled data, and measure the binary size before and after this commit. |
Please note that we include binary size benchmarks on the dashboard. They only get run on main. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unless this blocks something, I'd like to have measurements so we're not working in the dark.
If we merge it you can inspect all our binary size benchmarks. |
Running these commands:
On main:
On the branch, f2b687c:
Most are the same. The diffs:
|
Here is Why does this one regress in size? It only uses FixedDecimalFormatter. On the other hand, it's nice to see |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since the only motivation for this change are the metrics, we should take a metrics-first mindset. I'm not blown away by the metrics I saw and would appreciate more investigation into them. I'm approving only because I have trust in your engineering judgement to decide whether this is the right change to land.
calendar/chinesecache@1, <singleton>, 754B, 134f5ec7c9bdc494 | ||
calendar/dangicache@1, <singleton>, 754B, 79d5d32449956411 | ||
calendar/islamicobservationalcache@1, <singleton>, 504B, 18af87d272c76f14 | ||
calendar/islamicummalquracache@1, <singleton>, 504B, ef6ac9ff14fd3f3 | ||
calendar/japanese@1, <singleton>, 111B, b31e52deaf52706f | ||
calendar/japanext@1, <singleton>, 5216B, 6c20e216c8cd6e41 | ||
datetime/week_data@1, <lookup>, 2760B, 115 identifiers | ||
datetime/week_data@1, <lookup>, 1626B, 115 identifiers |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Although these numbers are helpful, they are only proxy metrics. The real metric is the post-LTO binary size.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The real metrics are post-LTO binary sizes of actual programs, not our simple examples.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same conditional approval as above.
b09e28e
to
05664aa
Compare
Closing, as zerotrie numbers are a lot better |
&'static str
is 16 bytes (8 bytes on 32-bit), so unless a locale is longer than that,&'static str
is more expensive thanTinyAsciiStr
s, even without considering the pointed-to-data. We very rarely (never?) have locales that are actually this long.