Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify ZeroVec/VarZeroVec error handling, consolidate ULEError type #1389

Merged
merged 8 commits into from
Dec 14, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions components/datetime/src/fields/ule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::fields;
use zerovec::ule::{AsULE, ULE};
use zerovec::ule::{AsULE, ZeroVecError, ULE};

/// `FieldULE` is a type optimized for efficent storing and
/// deserialization of `DateTimeFormat` `Field` elements using
Expand Down Expand Up @@ -70,15 +70,15 @@ impl AsULE for fields::Field {
// 5 The other ULE methods use the default impl.
// 6. FieldULE byte equality is semantic equality.
unsafe impl ULE for FieldULE {
type Error = &'static str;

fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> {
fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
let mut chunks = bytes.chunks_exact(2);

if !chunks.all(|c| fields::Field::bytes_in_range(&c[0], &c[1]))
|| !chunks.remainder().is_empty()
{
return Err("Invalid byte sequence");
if !chunks.all(|c| fields::Field::bytes_in_range(&c[0], &c[1])) {
return Err(ZeroVecError::parse::<Self>());
}

if !chunks.remainder().is_empty() {
return Err(ZeroVecError::length::<Self>(bytes.len()));
}
Ok(())
}
Expand Down
30 changes: 15 additions & 15 deletions components/datetime/src/pattern/item/ule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
use super::{GenericPatternItem, PatternItem};
use crate::fields;
use core::convert::TryFrom;
use zerovec::ule::{AsULE, ULE};
use zerovec::ule::{AsULE, ZeroVecError, ULE};

/// `PatternItemULE` is a type optimized for efficent storing and
/// deserialization of `DateTimeFormat` `PatternItem` elements using
Expand Down Expand Up @@ -93,15 +93,15 @@ impl PatternItemULE {
// 5. The other ULE methods use the default impl.
// 6. PatternItemULE byte equality is semantic equality.
unsafe impl ULE for PatternItemULE {
type Error = &'static str;

fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> {
fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
let mut chunks = bytes.chunks_exact(3);

if !chunks.all(|c| Self::bytes_in_range((&c[0], &c[1], &c[2])))
|| !chunks.remainder().is_empty()
{
return Err("Invalid byte sequence");
if !chunks.all(|c| Self::bytes_in_range((&c[0], &c[1], &c[2]))) {
return Err(ZeroVecError::parse::<Self>());
}

if !chunks.remainder().is_empty() {
return Err(ZeroVecError::length::<Self>(bytes.len()));
}
Ok(())
}
Expand Down Expand Up @@ -228,15 +228,15 @@ impl GenericPatternItemULE {
// 5. The other ULE methods use the default impl.
// 6. GenericPatternItemULE byte equality is semantic equality.
unsafe impl ULE for GenericPatternItemULE {
type Error = &'static str;

fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> {
fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
let mut chunks = bytes.chunks_exact(3);

if !chunks.all(|c| Self::bytes_in_range((&c[0], &c[1], &c[2])))
|| !chunks.remainder().is_empty()
{
return Err("Invalid byte sequence");
if !chunks.all(|c| Self::bytes_in_range((&c[0], &c[1], &c[2]))) {
return Err(ZeroVecError::parse::<Self>());
}

if !chunks.remainder().is_empty() {
return Err(ZeroVecError::length::<Self>(bytes.len()));
}
Ok(())
}
Expand Down
15 changes: 6 additions & 9 deletions components/plurals/src/rules/runtime/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use core::{
use icu_provider::yoke::{self, *};
use num_enum::{IntoPrimitive, TryFromPrimitive, UnsafeFromPrimitive};
use zerovec::{
ule::{custom::EncodeAsVarULE, AsULE, PairULE, PlainOldULE, VarULE, ULE},
ule::{custom::EncodeAsVarULE, AsULE, PairULE, PlainOldULE, VarULE, ZeroVecError, ULE},
{VarZeroVec, ZeroVec},
};

Expand Down Expand Up @@ -326,8 +326,8 @@ impl RelationULE {
}

#[inline]
fn validate_andor_polarity_operand(encoded: u8) -> Result<(), &'static str> {
Operand::try_from(encoded & 0b0011_1111).map_err(|_| "Failed to decode operand.")?;
fn validate_andor_polarity_operand(encoded: u8) -> Result<(), ZeroVecError> {
Operand::try_from(encoded & 0b0011_1111).map_err(|_| ZeroVecError::parse::<Self>())?;
Ok(())
}

Expand Down Expand Up @@ -361,8 +361,6 @@ impl RelationULE {
// 6. The other VarULE methods use the default impl.
// 7. RelationULE byte equality is semantic equality.
unsafe impl VarULE for RelationULE {
type Error = &'static str;

#[inline]
unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
let ptr = bytes.as_ptr();
Expand All @@ -387,15 +385,14 @@ unsafe impl VarULE for RelationULE {
}

#[inline]
fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> {
fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
RelationULE::validate_andor_polarity_operand(bytes[0])?;
// Skip bytes 1-4 as they're always valid `u32` for `Modulo`.
if bytes.len() < 5 {
return Err("byte slice is too short");
return Err(ZeroVecError::parse::<Self>());
}
let remaining = &bytes[5..];
RangeOrValueULE::validate_byte_slice(remaining)
.map_err(|_| "Invalid list of RangeOrValueULE")?;
RangeOrValueULE::validate_byte_slice(remaining)?;
Ok(())
}
}
Expand Down
9 changes: 3 additions & 6 deletions components/properties/src/ule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ use crate::{
};

use core::convert::TryFrom;
use num_enum::TryFromPrimitiveError;
use zerovec::ule::{AsULE, PlainOldULE, ULE};
use zerovec::ule::{AsULE, PlainOldULE, ZeroVecError, ULE};

#[repr(transparent)]
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
Expand Down Expand Up @@ -42,12 +41,10 @@ impl AsULE for GeneralSubcategory {
// 5. The other ULE methods use the default impl.
// 6. The PartialEq implementation on GeneralSubcategory uses byte equality.
unsafe impl ULE for GeneralSubcategoryULE {
type Error = TryFromPrimitiveError<GeneralSubcategory>;

fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> {
fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
// Validate the bytes
for b in bytes {
GeneralSubcategory::try_from(*b)?;
GeneralSubcategory::try_from(*b).map_err(|_| ZeroVecError::parse::<Self>())?;
}
Ok(())
}
Expand Down
8 changes: 2 additions & 6 deletions utils/codepointtrie/src/codepointtrie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use core::num::TryFromIntError;
use serde::{Deserialize, Serialize};
use yoke::{Yokeable, ZeroCopyFrom};
use zerovec::ZeroVec;
use zerovec::ZeroVecError;

/// The type of trie represents whether the trie has an optimization that
/// would make it small or fast.
Expand Down Expand Up @@ -331,12 +332,7 @@ impl<'trie, T: TrieValue> CodePointTrie<'trie, T> {
/// let cpt2: CodePointTrie<u32> = cpt1.try_into_converted()
/// .expect("infallible");
/// ```
pub fn try_into_converted<P>(
self,
) -> Result<
CodePointTrie<'trie, P>,
<<P as zerovec::ule::AsULE>::ULE as zerovec::ule::ULE>::Error,
>
pub fn try_into_converted<P>(self) -> Result<CodePointTrie<'trie, P>, ZeroVecError>
where
P: TrieValue,
{
Expand Down
1 change: 0 additions & 1 deletion utils/zerovec/benches/zerovec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ fn get_needles_and_haystack() -> (Vec<u32>, Vec<u32>) {
fn vec_to_unaligned_uvec<'a, T>(vec: &Vec<T>, buffer: &'a mut AlignedBuffer) -> ZeroVec<'a, T>
where
T: EqULE + Copy + PartialEq + fmt::Debug,
<T::ULE as ULE>::Error: fmt::Debug,
{
// Pad with zero to ensure it is not aligned
buffer.0.push(0);
Expand Down
3 changes: 2 additions & 1 deletion utils/zerovec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,6 @@ pub mod zerovec;
mod yoke_impls;

pub use crate::map::ZeroMap;
pub use crate::varzerovec::{VarZeroVec, VarZeroVecError};
pub use crate::ule::ZeroVecError;
pub use crate::varzerovec::VarZeroVec;
pub use crate::zerovec::ZeroVec;
11 changes: 3 additions & 8 deletions utils/zerovec/src/ule/chars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,16 @@ pub struct CharULE([u8; 4]);
// 5. The other ULE methods use the default impl.
// 6. CharULE byte equality is semantic equality
unsafe impl ULE for CharULE {
type Error = ULEError<core::char::CharTryFromError>;

#[inline]
fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> {
fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
if bytes.len() % 4 != 0 {
return Err(ULEError::InvalidLength {
ty: "char",
len: bytes.len(),
});
return Err(ZeroVecError::length::<Self>(bytes.len()));
}
// Validate the bytes
for chunk in bytes.chunks_exact(4) {
// TODO: Use slice::as_chunks() when stabilized
let u = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
char::try_from(u)?;
char::try_from(u).map_err(|_| ZeroVecError::parse::<Self>())?;
}
Ok(())
}
Expand Down
9 changes: 4 additions & 5 deletions utils/zerovec/src/ule/custom/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,11 @@
//! // 6. The other VarULE methods use the default impl.
//! // 7. FooULE byte equality is semantic equality
//! unsafe impl VarULE for FooULE {
//! type Error = &'static str; // use strings for simplicity
//! fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> {
//! fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
//! // validate each field
//! <char as AsULE>::ULE::validate_byte_slice(&bytes[0..4]).map_err(|_| "validating char failed")?;
//! <char as AsULE>::ULE::validate_byte_slice(&bytes[4..8]).map_err(|_| "validating u32 failed")?;
//! let _ = ZeroVec::<u32>::parse_byte_slice(&bytes[8..]).map_err(|_| "validating ZeroVec failed")?;
//! <char as AsULE>::ULE::validate_byte_slice(&bytes[0..4]).map_err(|_| ZeroVecError::parse::<Self>())?;
//! <char as AsULE>::ULE::validate_byte_slice(&bytes[4..8]).map_err(|_| ZeroVecError::parse::<Self>())?;
//! let _ = ZeroVec::<u32>::parse_byte_slice(&bytes[8..]).map_err(|_| ZeroVecError::parse::<Self>())?;
//! Ok(())
//! }
//! unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
Expand Down
40 changes: 31 additions & 9 deletions utils/zerovec/src/ule/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,53 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use core::any;
use core::fmt;

/// A generic error type to be used for decoding slices of ULE types
#[derive(Copy, Clone, Debug)]
pub enum ULEError<E> {
pub enum ZeroVecError {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: If you name this ZeroVecError, please move it up to the top level instead of nesting it under ule

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. I continued reexporting it from ule for the convenience of glob imports

/// Attempted to parse a buffer into a slice of the given ULE type but its
/// length was not compatible
InvalidLength { ty: &'static str, len: usize },
ParseError(E),
/// The byte sequence provided for `ty` failed to parse correctly
ParseError { ty: &'static str },
/// The byte buffer was not in the appropriate format for VarZeroVec
VZVFormatError,
Manishearth marked this conversation as resolved.
Show resolved Hide resolved
}

impl<E: fmt::Display> fmt::Display for ULEError<E> {
impl fmt::Display for ZeroVecError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
match *self {
ULEError::InvalidLength { ty, len } => {
ZeroVecError::InvalidLength { ty, len } => {
write!(f, "Invalid length {} for slice of type {}", len, ty)
}
ULEError::ParseError(ref e) => e.fmt(f),
ZeroVecError::ParseError { ty } => {
write!(f, "Could not parse bytes to slice of type {}", ty)
}
ZeroVecError::VZVFormatError => {
write!(f, "Invalid format for VarZeroVec buffer")
}
}
}
}

impl<E> From<E> for ULEError<E> {
fn from(e: E) -> Self {
ULEError::ParseError(e)
impl ZeroVecError {
/// Construct a parse error for the given type
pub fn parse<T: ?Sized + 'static>() -> ZeroVecError {
ZeroVecError::ParseError {
ty: any::type_name::<T>(),
}
}

/// Construct an "invalid length" error for the given type and length
pub fn length<T: ?Sized + 'static>(len: usize) -> ZeroVecError {
ZeroVecError::InvalidLength {
ty: any::type_name::<T>(),
len,
}
}
}

#[cfg(feature = "std")]
impl<E: fmt::Display + fmt::Debug> ::std::error::Error for ULEError<E> {}
impl ::std::error::Error for ZeroVecError {}
20 changes: 7 additions & 13 deletions utils/zerovec/src/ule/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ mod plain;
mod slices;

pub use chars::CharULE;
pub use error::ULEError;
pub use pair::{PairULE, PairULEError};
pub use error::ZeroVecError;
pub use pair::PairULE;
pub use plain::PlainOldULE;

use alloc::alloc::Layout;
use alloc::borrow::ToOwned;
use alloc::boxed::Box;
use core::{fmt, mem, slice};
use core::{mem, slice};

/// Fixed-width, byte-aligned data that can be cast to and from a little-endian byte slice.
///
Expand Down Expand Up @@ -63,15 +63,12 @@ where
Self: Sized,
Self: Copy + 'static,
{
/// The error that occurs if a byte array is not valid for this ULE.
type Error: fmt::Display;

/// Validates a byte slice, `&[u8]`.
///
/// If `Self` is not well-defined for all possible bit values, the bytes should be validated.
/// If the bytes can be transmuted, *in their entirety*, to a valid slice of `Self`, then `Ok`
/// should be returned; otherwise, `Self::Error` should be returned.
fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error>;
fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError>;

/// Parses a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime.
///
Expand All @@ -83,7 +80,7 @@ where
///
/// Note: The following equality should hold: `bytes.len() % size_of::<Self>() == 0`. This
/// means that the returned slice can span the entire byte slice.
fn parse_byte_slice(bytes: &[u8]) -> Result<&[Self], Self::Error> {
fn parse_byte_slice(bytes: &[u8]) -> Result<&[Self], ZeroVecError> {
Self::validate_byte_slice(bytes)?;
debug_assert_eq!(bytes.len() % mem::size_of::<Self>(), 0);
Ok(unsafe { Self::from_byte_slice_unchecked(bytes) })
Expand Down Expand Up @@ -263,15 +260,12 @@ where
/// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may
/// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`.
pub unsafe trait VarULE: 'static {
/// The error that occurs if a byte array is not valid for this ULE.
type Error: fmt::Display;

/// Validates a byte slice, `&[u8]`.
///
/// If `Self` is not well-defined for all possible bit values, the bytes should be validated.
/// If the bytes can be transmuted, *in their entirety*, to a valid `&Self`, then `Ok` should
/// be returned; otherwise, `Self::Error` should be returned.
fn validate_byte_slice(_bytes: &[u8]) -> Result<(), Self::Error>;
fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError>;

/// Parses a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime.
///
Expand All @@ -284,7 +278,7 @@ pub unsafe trait VarULE: 'static {
/// Note: The following equality should hold: `size_of_val(result) == size_of_val(bytes)`,
/// where `result` is the successful return value of the method. This means that the return
/// value spans the entire byte slice.
fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, Self::Error> {
fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> {
Self::validate_byte_slice(bytes)?;
let result = unsafe { Self::from_byte_slice_unchecked(bytes) };
debug_assert_eq!(mem::size_of_val(result), mem::size_of_val(bytes));
Expand Down
Loading