diff --git a/CHANGELOG.md b/CHANGELOG.md index aca6bf462..fae0f65db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## vNext + +- *BREAKING CHANGE*: `is_u8` method is removed from the public API. + Consumers of the API don't need to call or override it, and doing + so might cause undefined behavior. + ## 0.9.0 - *BREAKING CHANGE*: `is_u8` optimization helper is now unsafe since it may cause undefined behavior if it returns `true` for the type that is not safe @@ -28,4 +34,3 @@ - Added support for `std::borrow::Cow` - Avoid silent integer casts since they can lead to hidden security issues. - Removed `Cargo.lock` as it is advised for lib crates. - diff --git a/borsh/src/de/mod.rs b/borsh/src/de/mod.rs index f37a47469..0848360cc 100644 --- a/borsh/src/de/mod.rs +++ b/borsh/src/de/mod.rs @@ -36,21 +36,41 @@ pub trait BorshDeserialize: Sized { Ok(result) } - /// Whether Self is u8. - /// NOTE: `Vec` is the most common use-case for serialization and deserialization, it's - /// worth handling it as a special case to improve performance. - /// It's a workaround for specific `Vec` implementation versus generic `Vec` - /// implementation. See https://github.com/rust-lang/rfcs/pull/1210 for details. - /// - /// It's marked unsafe, because if the type is not `u8` it leads to UB. See related issues: - /// - https://github.com/near/borsh-rs/issues/17 - /// - https://github.com/near/borsh-rs/issues/18 + /// Hidden private implementation detail, overriding this method is unsound. + // NOTE: `Vec` is the most common use-case for serialization and deserialization, + // it's worth handling it as a special case to improve performance. To do this properly, + // we need [specialization](https://github.com/rust-lang/rfcs/pull/1210), which is not + // currently available. + // + // As a work-around, we specialize manually. We need to ask a generic `T` if it is, + // in fact, an `u8`. We rely on the answer being correct for soundness, so overriding + // this method is unsafe. Ideally, we'd have an unsafe trait like + // + // ``` + // unsafe trait IsU8 { const YES: bool = false; } + // ``` + // + // Alas, that also needs specialization. So we just hide the method from crate's public + // API altogether. + // + // See https://github.com/near/borsh-rs/issues/24. #[inline] - unsafe fn is_u8() -> bool { + #[doc(hidden)] + fn __dont_override_me_is_u8() -> bool { false } } +#[inline] +fn is_u8() -> bool { + let res = T::__dont_override_me_is_u8(); + debug_assert!( + !res || size_of::() == size_of::(), + "Do not override `__dont_override_me_is_u8`, it is unsound" + ); + res +} + impl BorshDeserialize for u8 { #[inline] fn deserialize(buf: &mut &[u8]) -> Result { @@ -66,7 +86,10 @@ impl BorshDeserialize for u8 { } #[inline] - unsafe fn is_u8() -> bool { + #[doc(hidden)] + fn __dont_override_me_is_u8() -> bool { + // SAFETY: this is technically an `unsafe { }` block, + // see the comment on the trait method for details. true } } @@ -246,7 +269,7 @@ where let len = u32::deserialize(buf)?; if len == 0 { Ok(Vec::new()) - } else if unsafe { T::is_u8() } && size_of::() == size_of::() { + } else if is_u8::() { let len = len.try_into().map_err(|_| ErrorKind::InvalidInput)?; if buf.len() < len { return Err(Error::new( @@ -493,7 +516,7 @@ macro_rules! impl_arrays { #[inline] fn deserialize(buf: &mut &[u8]) -> Result { let mut result = [T::default(); $len]; - if unsafe { T::is_u8() } && size_of::() == size_of::() { + if is_u8::() { if buf.len() < $len { return Err(Error::new( ErrorKind::InvalidInput, diff --git a/borsh/src/ser/mod.rs b/borsh/src/ser/mod.rs index f87d183a0..93e2b5041 100644 --- a/borsh/src/ser/mod.rs +++ b/borsh/src/ser/mod.rs @@ -24,21 +24,42 @@ pub trait BorshSerialize { Ok(result) } - /// Whether Self is u8. - /// NOTE: `Vec` is the most common use-case for serialization and deserialization, it's - /// worth handling it as a special case to improve performance. - /// It's a workaround for specific `Vec` implementation versus generic `Vec` - /// implementation. See https://github.com/rust-lang/rfcs/pull/1210 for details. - /// - /// It's marked unsafe, because if the type is not `u8` it leads to UB. See related issues: - /// - https://github.com/near/borsh-rs/issues/17 - /// - https://github.com/near/borsh-rs/issues/18 + /// Hidden private implementation detail, overriding this method is unsound. + // NOTE: `Vec` is the most common use-case for serialization and deserialization, + // it's worth handling it as a special case to improve performance. To do this properly, + // we need [specialization](https://github.com/rust-lang/rfcs/pull/1210), which is not + // currently available. + // + // As a work-around, we specialize manually. We need to ask a generic `T` if it is, + // in fact, an `u8`. We rely on the answer being correct for soundness, so overriding + // this method is unsafe. Ideally, we'd have an unsafe trait like + // + // ``` + // unsafe trait IsU8 { const YES: bool = false; } + // ``` + // + // Alas, that also needs specialization. So we just hide the method from crate's public + // API altogether. + // + // See https://github.com/near/borsh-rs/issues/24. #[inline] - unsafe fn is_u8() -> bool { + #[doc(hidden)] + fn __dont_override_me_is_u8() -> bool { false } } +#[inline] +fn is_u8() -> bool { + let res = T::__dont_override_me_is_u8(); + debug_assert!( + // Can't check `TypeId`, as that requires `: 'static` + !res || size_of::() == size_of::(), + "Do not override `__dont_override_me_is_u8`, it is unsound" + ); + res +} + impl BorshSerialize for u8 { #[inline] fn serialize(&self, writer: &mut W) -> Result<()> { @@ -46,7 +67,10 @@ impl BorshSerialize for u8 { } #[inline] - unsafe fn is_u8() -> bool { + #[doc(hidden)] + fn __dont_override_me_is_u8() -> bool { + // SAFETY: this is technically an `unsafe { }` block, + // see the comment on the trait method for details. true } } @@ -159,7 +183,7 @@ impl BorshSerialize for String { /// Helper method that is used to serialize a slice of data (without the length marker). #[inline] fn serialize_slice(data: &[T], writer: &mut W) -> Result<()> { - if unsafe { T::is_u8() } && size_of::() == size_of::() { + if is_u8::() { // The code below uses unsafe memory representation from `&[T]` to `&[u8]`. // The size of the memory should match because `size_of::() == size_of::()`. // @@ -408,7 +432,7 @@ macro_rules! impl_arrays { { #[inline] fn serialize(&self, writer: &mut W) -> Result<()> { - if unsafe { T::is_u8() } && size_of::() == size_of::() { + if is_u8::() { // The code below uses unsafe memory representation from `&[T]` to `&[u8]`. // The size of the memory should match because `size_of::() == size_of::()`. //