Skip to content

Commit

Permalink
Handle incorrect VZV bytes representation (#3883)
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian authored Aug 17, 2023
1 parent d93f901 commit 2f806bc
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 96 deletions.
166 changes: 83 additions & 83 deletions provider/datagen/tests/data/postcard/fingerprints.csv
Original file line number Diff line number Diff line change
Expand Up @@ -1253,7 +1253,7 @@ list/unit@1, sr-Latn, 23B, 58b0dcc30cdabe1d
list/unit@1, th, 30B, 3634af6f27539959
list/unit@1, tr, 16B, 375096409d6f0946
list/unit@1, und, 17B, 924e2d2be2336604
locid_transform/aliases@1, und, 8108B, 910e28d57c7c9911
locid_transform/aliases@1, und, 8104B, 3ad5e40d558cf536
locid_transform/likelysubtags@1, und, 4691B, d7ae441bbc9e1b75
locid_transform/likelysubtags_ext@1, und, 75692B, d502bcb0acd0e4e4
locid_transform/likelysubtags_l@1, und, 1810B, e172457d83d68447
Expand Down Expand Up @@ -1397,96 +1397,96 @@ props/casemap@1, und, 22431B, e5f6c1c194a5002f
props/casemap_unfold@1, und, 932B, 85d63de2fdea5a3d
props/ccc@1, und, 5270B, 85887761e8c0b554
props/ea@1, und, 4939B, de9bdf3107a1a963
props/exemplarchars/auxiliary@1, ar, 103B, 81ba9ba73c59c4c4
props/exemplarchars/auxiliary@1, ar-EG, 103B, 81ba9ba73c59c4c4
props/exemplarchars/auxiliary@1, bn, 23B, 5030e834721a31d
props/exemplarchars/auxiliary@1, ccp, 7B, a7ee17bdde05404c
props/exemplarchars/auxiliary@1, en, 136B, ca9fa2472e43d8db
props/exemplarchars/auxiliary@1, en-001, 136B, ca9fa2472e43d8db
props/exemplarchars/auxiliary@1, en-ZA, 192B, ab071c63635d6ef6
props/exemplarchars/auxiliary@1, es, 200B, 3e5ad25e4b57ebd1
props/exemplarchars/auxiliary@1, es-AR, 200B, 3e5ad25e4b57ebd1
props/exemplarchars/auxiliary@1, fil, 47B, 1e59c3e095a11ff4
props/exemplarchars/auxiliary@1, fr, 144B, 8f01707e31ae9b77
props/exemplarchars/auxiliary@1, ja, 608B, 5cda0ba025ae5157
props/exemplarchars/auxiliary@1, ar, 99B, 6558940e4709505f
props/exemplarchars/auxiliary@1, ar-EG, 99B, 6558940e4709505f
props/exemplarchars/auxiliary@1, bn, 19B, c240e2a137c88e
props/exemplarchars/auxiliary@1, ccp, 3B, c76b19d07c9fe625
props/exemplarchars/auxiliary@1, en, 132B, ca4c480e9d2cd1ec
props/exemplarchars/auxiliary@1, en-001, 132B, ca4c480e9d2cd1ec
props/exemplarchars/auxiliary@1, en-ZA, 188B, 60246eda9d6070c9
props/exemplarchars/auxiliary@1, es, 196B, efe819d83875f9d6
props/exemplarchars/auxiliary@1, es-AR, 196B, efe819d83875f9d6
props/exemplarchars/auxiliary@1, fil, 43B, 683b36475b8b233c
props/exemplarchars/auxiliary@1, fr, 140B, 80654f8db9f31544
props/exemplarchars/auxiliary@1, ja, 604B, de28efe64dd267ca
props/exemplarchars/auxiliary@1, ru, 61B, 6a20fa4f2a8172f3
props/exemplarchars/auxiliary@1, sr, 37B, e291a541ad2ca8fd
props/exemplarchars/auxiliary@1, sr-Latn, 31B, 60f5752fc7094742
props/exemplarchars/auxiliary@1, th, 15B, 5d3232640b4d876a
props/exemplarchars/auxiliary@1, tr, 152B, 54b1b627d8fe7736
props/exemplarchars/auxiliary@1, und, 7B, a7ee17bdde05404c
props/exemplarchars/index@1, ar, 39B, dd0c9bdcb63dc97b
props/exemplarchars/index@1, ar-EG, 39B, dd0c9bdcb63dc97b
props/exemplarchars/auxiliary@1, sr-Latn, 27B, b96890025602e911
props/exemplarchars/auxiliary@1, th, 11B, deddb822a77299fc
props/exemplarchars/auxiliary@1, tr, 148B, 51731dc6dd6ada64
props/exemplarchars/auxiliary@1, und, 3B, c76b19d07c9fe625
props/exemplarchars/index@1, ar, 35B, 1e36db71576dd57a
props/exemplarchars/index@1, ar-EG, 35B, 1e36db71576dd57a
props/exemplarchars/index@1, bn, 66B, d11f0e0b22169f5b
props/exemplarchars/index@1, ccp, 15B, d350751af6bbc3d3
props/exemplarchars/index@1, en, 15B, 757e29ef1029b9f2
props/exemplarchars/index@1, en-001, 15B, 757e29ef1029b9f2
props/exemplarchars/index@1, en-ZA, 15B, 757e29ef1029b9f2
props/exemplarchars/index@1, es, 23B, d2c341d54a78da7
props/exemplarchars/index@1, es-AR, 23B, d2c341d54a78da7
props/exemplarchars/index@1, ccp, 11B, 3479553a04586974
props/exemplarchars/index@1, en, 11B, c5ed740dfb27db94
props/exemplarchars/index@1, en-001, 11B, c5ed740dfb27db94
props/exemplarchars/index@1, en-ZA, 11B, c5ed740dfb27db94
props/exemplarchars/index@1, es, 19B, d390b154318cf4c5
props/exemplarchars/index@1, es-AR, 19B, d390b154318cf4c5
props/exemplarchars/index@1, fil, 27B, c6994dd550d5b4e2
props/exemplarchars/index@1, fr, 15B, 757e29ef1029b9f2
props/exemplarchars/index@1, ja, 87B, 1fe8d5166096a8f
props/exemplarchars/index@1, ru, 39B, b216a14c26e08b41
props/exemplarchars/index@1, sr, 39B, 1f34a0b2c4e8c9cf
props/exemplarchars/index@1, fr, 11B, c5ed740dfb27db94
props/exemplarchars/index@1, ja, 83B, 6c0d3e7fc7b1cdcc
props/exemplarchars/index@1, ru, 35B, 638c1c9980e0bd90
props/exemplarchars/index@1, sr, 35B, 178072467638bd1b
props/exemplarchars/index@1, sr-Latn, 60B, 6e724686cc05703e
props/exemplarchars/index@1, th, 15B, 9a73c145265a2ae5
props/exemplarchars/index@1, tr, 55B, becd57ac0c1a82ed
props/exemplarchars/index@1, und, 7B, a7ee17bdde05404c
props/exemplarchars/main@1, ar, 31B, 4fa3539c523a0dc5
props/exemplarchars/main@1, ar-EG, 31B, 4fa3539c523a0dc5
props/exemplarchars/index@1, th, 11B, 146fc32e7f67f2c8
props/exemplarchars/index@1, tr, 51B, 121515d5ff38f62b
props/exemplarchars/index@1, und, 3B, c76b19d07c9fe625
props/exemplarchars/main@1, ar, 27B, c08afa9f1c17fc4c
props/exemplarchars/main@1, ar-EG, 27B, c08afa9f1c17fc4c
props/exemplarchars/main@1, bn, 146B, 31d8ce207d73155
props/exemplarchars/main@1, ccp, 15B, e191e70902e7f035
props/exemplarchars/main@1, en, 15B, a7d71792c12bfffa
props/exemplarchars/main@1, en-001, 15B, a7d71792c12bfffa
props/exemplarchars/main@1, en-ZA, 15B, a7d71792c12bfffa
props/exemplarchars/main@1, es, 71B, fa77ca1cfdf9230c
props/exemplarchars/main@1, es-AR, 71B, fa77ca1cfdf9230c
props/exemplarchars/main@1, ccp, 11B, 4ca535c7c5505492
props/exemplarchars/main@1, en, 11B, fdebd22f9ff8a46b
props/exemplarchars/main@1, en-001, 11B, fdebd22f9ff8a46b
props/exemplarchars/main@1, en-ZA, 11B, fdebd22f9ff8a46b
props/exemplarchars/main@1, es, 67B, 36546e1a555281da
props/exemplarchars/main@1, es-AR, 67B, 36546e1a555281da
props/exemplarchars/main@1, fil, 27B, 5aa0ee9120937a2e
props/exemplarchars/main@1, fr, 87B, 978fe355ffd56d69
props/exemplarchars/main@1, ja, 13960B, ecdb0541bf50467a
props/exemplarchars/main@1, ru, 23B, 776f46a709a45c84
props/exemplarchars/main@1, sr, 47B, 6dd4749272778c73
props/exemplarchars/main@1, fr, 83B, e8c49e64b7de061f
props/exemplarchars/main@1, ja, 13956B, 71baba690436f06b
props/exemplarchars/main@1, ru, 19B, 826b7a0baf4968c7
props/exemplarchars/main@1, sr, 43B, 543c615d18604069
props/exemplarchars/main@1, sr-Latn, 84B, b3210a1fdbb25658
props/exemplarchars/main@1, th, 23B, dc264c756b6c70c5
props/exemplarchars/main@1, tr, 79B, 2a46aadb009b9040
props/exemplarchars/main@1, und, 7B, a7ee17bdde05404c
props/exemplarchars/numbers@1, ar, 79B, 45e0d61dd613fbb9
props/exemplarchars/numbers@1, ar-EG, 79B, 45e0d61dd613fbb9
props/exemplarchars/numbers@1, bn, 55B, 5dbd24383df14b12
props/exemplarchars/numbers@1, ccp, 47B, e1b5384c071ba661
props/exemplarchars/numbers@1, en, 47B, e1b5384c071ba661
props/exemplarchars/numbers@1, en-001, 47B, e1b5384c071ba661
props/exemplarchars/numbers@1, en-ZA, 47B, e8216b1455c6fe34
props/exemplarchars/numbers@1, es, 47B, e1b5384c071ba661
props/exemplarchars/numbers@1, es-AR, 47B, e1b5384c071ba661
props/exemplarchars/numbers@1, fil, 47B, e1b5384c071ba661
props/exemplarchars/numbers@1, fr, 87B, 4665b771b63788a9
props/exemplarchars/numbers@1, ja, 47B, e1b5384c071ba661
props/exemplarchars/numbers@1, ru, 47B, e8216b1455c6fe34
props/exemplarchars/numbers@1, sr, 47B, e1b5384c071ba661
props/exemplarchars/numbers@1, sr-Latn, 47B, e1b5384c071ba661
props/exemplarchars/numbers@1, th, 47B, e1b5384c071ba661
props/exemplarchars/numbers@1, tr, 47B, e1b5384c071ba661
props/exemplarchars/numbers@1, und, 47B, e1b5384c071ba661
props/exemplarchars/punctuation@1, ar, 119B, ed76591cd7577ded
props/exemplarchars/punctuation@1, ar-EG, 119B, ed76591cd7577ded
props/exemplarchars/punctuation@1, bn, 127B, fcc476bc9071298f
props/exemplarchars/punctuation@1, ccp, 136B, c611097cf97ab4b0
props/exemplarchars/punctuation@1, en, 127B, fcc476bc9071298f
props/exemplarchars/punctuation@1, en-001, 127B, fcc476bc9071298f
props/exemplarchars/punctuation@1, en-ZA, 127B, fcc476bc9071298f
props/exemplarchars/punctuation@1, es, 152B, 75f0f7cbd4257ed2
props/exemplarchars/punctuation@1, es-AR, 152B, 75f0f7cbd4257ed2
props/exemplarchars/punctuation@1, fil, 119B, cc9ce6072abce056
props/exemplarchars/punctuation@1, fr, 144B, ec3639703b696076
props/exemplarchars/punctuation@1, ja, 312B, c7c2f5c2e2a238e5
props/exemplarchars/punctuation@1, ru, 160B, f08f7f99e3254699
props/exemplarchars/punctuation@1, sr, 144B, 82109d7f0d039c80
props/exemplarchars/punctuation@1, sr-Latn, 144B, 82109d7f0d039c80
props/exemplarchars/punctuation@1, th, 111B, 3054746777f45c15
props/exemplarchars/punctuation@1, tr, 127B, fcc476bc9071298f
props/exemplarchars/punctuation@1, und, 87B, 1eb15399ab2a5d3b
props/exemplarchars/main@1, th, 19B, fd3463d155825715
props/exemplarchars/main@1, tr, 75B, f83818046ea9136c
props/exemplarchars/main@1, und, 3B, c76b19d07c9fe625
props/exemplarchars/numbers@1, ar, 75B, 6f6ed88ed26911fe
props/exemplarchars/numbers@1, ar-EG, 75B, 6f6ed88ed26911fe
props/exemplarchars/numbers@1, bn, 51B, b1a24d63a91986e5
props/exemplarchars/numbers@1, ccp, 43B, 6150146944dccad
props/exemplarchars/numbers@1, en, 43B, 6150146944dccad
props/exemplarchars/numbers@1, en-001, 43B, 6150146944dccad
props/exemplarchars/numbers@1, en-ZA, 43B, bdb5a5fd3e342c04
props/exemplarchars/numbers@1, es, 43B, 6150146944dccad
props/exemplarchars/numbers@1, es-AR, 43B, 6150146944dccad
props/exemplarchars/numbers@1, fil, 43B, 6150146944dccad
props/exemplarchars/numbers@1, fr, 83B, 46c1863087d7f583
props/exemplarchars/numbers@1, ja, 43B, 6150146944dccad
props/exemplarchars/numbers@1, ru, 43B, bdb5a5fd3e342c04
props/exemplarchars/numbers@1, sr, 43B, 6150146944dccad
props/exemplarchars/numbers@1, sr-Latn, 43B, 6150146944dccad
props/exemplarchars/numbers@1, th, 43B, 6150146944dccad
props/exemplarchars/numbers@1, tr, 43B, 6150146944dccad
props/exemplarchars/numbers@1, und, 43B, 6150146944dccad
props/exemplarchars/punctuation@1, ar, 115B, 849a586a4478edb6
props/exemplarchars/punctuation@1, ar-EG, 115B, 849a586a4478edb6
props/exemplarchars/punctuation@1, bn, 123B, 2e7906785ab47589
props/exemplarchars/punctuation@1, ccp, 132B, c3c0f02949da230d
props/exemplarchars/punctuation@1, en, 123B, 2e7906785ab47589
props/exemplarchars/punctuation@1, en-001, 123B, 2e7906785ab47589
props/exemplarchars/punctuation@1, en-ZA, 123B, 2e7906785ab47589
props/exemplarchars/punctuation@1, es, 148B, 6fc36862866bc1
props/exemplarchars/punctuation@1, es-AR, 148B, 6fc36862866bc1
props/exemplarchars/punctuation@1, fil, 115B, bcd01ba9a6f9afd2
props/exemplarchars/punctuation@1, fr, 140B, c93fbe47124d89f7
props/exemplarchars/punctuation@1, ja, 308B, 26f8e2db987a5e5c
props/exemplarchars/punctuation@1, ru, 156B, 4053f13c5d2e0ce1
props/exemplarchars/punctuation@1, sr, 140B, face2a479b6ba77f
props/exemplarchars/punctuation@1, sr-Latn, 140B, face2a479b6ba77f
props/exemplarchars/punctuation@1, th, 107B, 365776b3531212fd
props/exemplarchars/punctuation@1, tr, 123B, 2e7906785ab47589
props/exemplarchars/punctuation@1, und, 83B, 65bb6a9f6c28188f
props/gc@1, und, 16984B, 23f5131c2f0afb5d
props/graph@1, und, 5699B, 42fbc9da34d13b06
props/lb@1, und, 14640B, a43615cc519e775
Expand Down
5 changes: 4 additions & 1 deletion utils/zerovec/src/varzerovec/components.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F>
/// `things`, such that it parses to a `T::VarULE`
#[inline]
pub fn parse_byte_slice(slice: &'a [u8]) -> Result<Self, ZeroVecError> {
// The empty VZV is special-cased to the empty slice
if slice.is_empty() {
return Ok(VarZeroVecComponents {
len: 0,
Expand Down Expand Up @@ -219,6 +220,7 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F>
/// The bytes must have previously successfully run through
/// [`VarZeroVecComponents::parse_byte_slice()`]
pub unsafe fn from_bytes_unchecked(slice: &'a [u8]) -> Self {
// The empty VZV is special-cased to the empty slice
if slice.is_empty() {
return VarZeroVecComponents {
len: 0,
Expand Down Expand Up @@ -485,12 +487,13 @@ where
}

/// Collects the bytes for a VarZeroSlice into a Vec.
pub fn get_serializable_bytes<T, A, F>(elements: &[A]) -> Option<Vec<u8>>
pub fn get_serializable_bytes_non_empty<T, A, F>(elements: &[A]) -> Option<Vec<u8>>
where
T: VarULE + ?Sized,
A: EncodeAsVarULE<T>,
F: VarZeroVecFormat,
{
debug_assert!(!elements.is_empty());
let len = compute_serializable_len::<T, A, F>(elements)?;
debug_assert!(len >= LENGTH_WIDTH as u32);
let mut output: Vec<u8> = alloc::vec![0; len as usize];
Expand Down
17 changes: 11 additions & 6 deletions utils/zerovec/src/varzerovec/owned.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,18 @@ impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecOwned<T, F> {
where
A: EncodeAsVarULE<T>,
{
Ok(Self {
marker: PhantomData,
// TODO(#1410): Rethink length errors in VZV.
entire_slice: components::get_serializable_bytes::<T, A, F>(elements).ok_or(
"Attempted to build VarZeroVec out of elements that \
Ok(if elements.is_empty() {
Self::from_slice(VarZeroSlice::new_empty())
} else {
Self {
marker: PhantomData,
// TODO(#1410): Rethink length errors in VZV.
entire_slice: components::get_serializable_bytes_non_empty::<T, A, F>(elements)
.ok_or(
"Attempted to build VarZeroVec out of elements that \
cumulatively are larger than a u32 in size",
)?,
)?,
}
})
}

Expand Down
4 changes: 2 additions & 2 deletions utils/zerovec/src/varzerovec/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ pub struct VarZeroSlice<T: ?Sized, F = Index16> {
impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> {
/// Construct a new empty VarZeroSlice
pub const fn new_empty() -> &'static Self {
let arr: &[u8] = &[];
unsafe { mem::transmute(arr) }
// The empty VZV is special-cased to the empty slice
unsafe { mem::transmute(&[] as &[u8]) }
}

/// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer
Expand Down
34 changes: 30 additions & 4 deletions utils/zerovec/src/varzerovec/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -425,8 +425,12 @@ where
{
#[inline]
fn from(elements: &[A]) -> Self {
#[allow(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility
VarZeroVecOwned::try_from_elements(elements).unwrap().into()
if elements.is_empty() {
VarZeroSlice::new_empty().into()
} else {
#[allow(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility
VarZeroVecOwned::try_from_elements(elements).unwrap().into()
}
}
}

Expand All @@ -451,8 +455,14 @@ where
{
#[inline]
fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool {
// VarULE has an API guarantee that this is equivalent
// to `T::VarULE::eq()`
// VZV::from_elements used to produce a non-canonical representation of the
// empty VZV, so we cannot use byte equality for empty vecs.
if self.is_empty() || other.is_empty() {
return self.is_empty() && other.is_empty();
}
// VarULE has an API guarantee that byte equality is semantic equality.
// For non-empty VZVs, there's only a single metadata representation,
// so this guarantee extends to the whole VZV representation.
self.as_bytes().eq(other.as_bytes())
}
}
Expand Down Expand Up @@ -503,3 +513,19 @@ impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T
self.iter().cmp(other.iter())
}
}

#[test]
fn assert_single_empty_representation() {
assert_eq!(
VarZeroVec::<str>::new().as_bytes(),
VarZeroVec::<str>::from(&[] as &[&str]).as_bytes()
);
}

#[test]
fn weird_empty_representation_equality() {
assert_eq!(
VarZeroVec::<str>::parse_byte_slice(&[0, 0, 0, 0]).unwrap(),
VarZeroVec::<str>::parse_byte_slice(&[]).unwrap()
);
}

0 comments on commit 2f806bc

Please sign in to comment.