From 7e018999952ef6339f00617194749caa444e1b2d Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Sat, 14 Dec 2024 21:29:10 -0800 Subject: [PATCH 1/5] digest internals: Clarify how `BlockContext::finish` buffer is used. Although the buffer does initially contain the pending data, `pending` is a confusing name for it. --- src/digest.rs | 27 ++++++++++++++++----------- src/hmac.rs | 8 ++++---- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/digest.rs b/src/digest.rs index c81e6aee94..2e8b2ba0e7 100644 --- a/src/digest.rs +++ b/src/digest.rs @@ -69,31 +69,36 @@ impl BlockContext { leftover } + // On input, `block[..num_pending]` is the (possibly-empty) last *partial* + // chunk of input. It *must* be partial; that is, it is required that + // `num_pending < self.algorithm.block_len`. + // + // `block` may be arbitrarily overwritten. pub(crate) fn finish( mut self, - pending: &mut [u8], + block: &mut [u8], num_pending: usize, cpu_features: cpu::Features, ) -> Digest { let block_len = self.algorithm.block_len(); - assert_eq!(pending.len(), block_len); - assert!(num_pending < pending.len()); - let pending = &mut pending[..block_len]; + assert_eq!(block.len(), block_len); + assert!(num_pending < block.len()); + let block = &mut block[..block_len]; let mut padding_pos = num_pending; - pending[padding_pos] = 0x80; + block[padding_pos] = 0x80; padding_pos += 1; - if padding_pos > pending.len() - self.algorithm.len_len { - pending[padding_pos..].fill(0); - let (completed_bytes, leftover) = self.block_data_order(pending, cpu_features); + if padding_pos > block.len() - self.algorithm.len_len { + block[padding_pos..].fill(0); + let (completed_bytes, leftover) = self.block_data_order(block, cpu_features); debug_assert_eq!((completed_bytes, leftover.len()), (block_len, 0)); // We don't increase |self.completed_bytes| because the padding // isn't data, and so it isn't included in the data length. padding_pos = 0; } - pending[padding_pos..(block_len - 8)].fill(0); + block[padding_pos..(block_len - 8)].fill(0); // Output the length, in bits, in big endian order. let completed_bytes = self @@ -101,9 +106,9 @@ impl BlockContext { .checked_add(polyfill::u64_from_usize(num_pending)) .unwrap(); let copmleted_bits = BitLength::from_byte_len(completed_bytes).unwrap(); - pending[(block_len - 8)..].copy_from_slice(&copmleted_bits.to_be_bytes()); + block[(block_len - 8)..].copy_from_slice(&copmleted_bits.to_be_bytes()); - let (completed_bytes, leftover) = self.block_data_order(pending, cpu_features); + let (completed_bytes, leftover) = self.block_data_order(block, cpu_features); debug_assert_eq!((completed_bytes, leftover.len()), (block_len, 0)); Digest { diff --git a/src/hmac.rs b/src/hmac.rs index 34984d62aa..0e8bc5229c 100644 --- a/src/hmac.rs +++ b/src/hmac.rs @@ -315,11 +315,11 @@ impl Context { let cpu_features = cpu::features(); let algorithm = self.inner.algorithm(); - let mut pending = [0u8; digest::MAX_BLOCK_LEN]; - let pending = &mut pending[..algorithm.block_len()]; + let mut buffer = [0u8; digest::MAX_BLOCK_LEN]; + let buffer = &mut buffer[..algorithm.block_len()]; let num_pending = algorithm.output_len(); - pending[..num_pending].copy_from_slice(self.inner.finish().as_ref()); - Tag(self.outer.finish(pending, num_pending, cpu_features)) + buffer[..num_pending].copy_from_slice(self.inner.finish().as_ref()); + Tag(self.outer.finish(buffer, num_pending, cpu_features)) } } From adcae5a2e6be1699c6ca1e4359dc4765f92991b6 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Sat, 14 Dec 2024 22:22:58 -0800 Subject: [PATCH 2/5] digest internals: Simplify calling `BlockContext::finish`. Istead of forcing callers to slice the buffer to the block length and then asserting that they did so, just have `BlockContext::finish` do the slicing itself. (It already was.) --- src/digest.rs | 12 +++--------- src/hmac.rs | 3 +-- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/digest.rs b/src/digest.rs index 2e8b2ba0e7..15d3e9fc2d 100644 --- a/src/digest.rs +++ b/src/digest.rs @@ -76,12 +76,11 @@ impl BlockContext { // `block` may be arbitrarily overwritten. pub(crate) fn finish( mut self, - block: &mut [u8], + block: &mut [u8; MAX_BLOCK_LEN], num_pending: usize, cpu_features: cpu::Features, ) -> Digest { let block_len = self.algorithm.block_len(); - assert_eq!(block.len(), block_len); assert!(num_pending < block.len()); let block = &mut block[..block_len]; @@ -220,13 +219,8 @@ impl Context { /// has been called. pub fn finish(mut self) -> Digest { let cpu_features = cpu::features(); - - let block_len = self.block.algorithm.block_len(); - self.block.finish( - &mut self.pending[..block_len], - self.num_pending, - cpu_features, - ) + self.block + .finish(&mut self.pending, self.num_pending, cpu_features) } /// The algorithm that this context is using. diff --git a/src/hmac.rs b/src/hmac.rs index 0e8bc5229c..9306f7f097 100644 --- a/src/hmac.rs +++ b/src/hmac.rs @@ -315,8 +315,7 @@ impl Context { let cpu_features = cpu::features(); let algorithm = self.inner.algorithm(); - let mut buffer = [0u8; digest::MAX_BLOCK_LEN]; - let buffer = &mut buffer[..algorithm.block_len()]; + let buffer = &mut [0u8; digest::MAX_BLOCK_LEN]; let num_pending = algorithm.output_len(); buffer[..num_pending].copy_from_slice(self.inner.finish().as_ref()); Tag(self.outer.finish(buffer, num_pending, cpu_features)) From e481956f6c7f7ad16f2d8374542bdd8c6c98a7e4 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Mon, 13 May 2024 10:56:14 -0700 Subject: [PATCH 3/5] digest internals: Help compiler optimize `len_len` checks. Help the compiler understand the range of `len_len` so it can optimize away some bounds checks. --- src/digest.rs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/digest.rs b/src/digest.rs index 15d3e9fc2d..a844968b68 100644 --- a/src/digest.rs +++ b/src/digest.rs @@ -88,7 +88,7 @@ impl BlockContext { block[padding_pos] = 0x80; padding_pos += 1; - if padding_pos > block.len() - self.algorithm.len_len { + if padding_pos > block.len() - self.algorithm.block_len.len_len() { block[padding_pos..].fill(0); let (completed_bytes, leftover) = self.block_data_order(block, cpu_features); debug_assert_eq!((completed_bytes, leftover.len()), (block_len, 0)); @@ -288,9 +288,6 @@ pub struct Algorithm { chaining_len: usize, block_len: BlockLen, - /// The length of the length in the padding. - len_len: usize, - /// `block_data_order` processes all the full blocks of data in `data`. It /// returns the number of bytes processed and the unprocessed data, which /// is guaranteed to be less than `block_len` bytes long. @@ -356,7 +353,6 @@ pub static SHA1_FOR_LEGACY_USE_ONLY: Algorithm = Algorithm { output_len: sha1::OUTPUT_LEN, chaining_len: sha1::CHAINING_LEN, block_len: sha1::BLOCK_LEN, - len_len: 64 / 8, block_data_order: dynstate::sha1_block_data_order, format_output: dynstate::sha256_format_output, initial_state: DynState::new32([ @@ -379,7 +375,6 @@ pub static SHA256: Algorithm = Algorithm { output_len: OutputLen::_256, chaining_len: SHA256_OUTPUT_LEN, block_len: SHA256_BLOCK_LEN, - len_len: 64 / 8, block_data_order: dynstate::sha256_block_data_order, format_output: dynstate::sha256_format_output, initial_state: DynState::new32([ @@ -402,7 +397,6 @@ pub static SHA384: Algorithm = Algorithm { output_len: OutputLen::_384, chaining_len: SHA512_OUTPUT_LEN, block_len: SHA512_BLOCK_LEN, - len_len: SHA512_LEN_LEN, block_data_order: dynstate::sha512_block_data_order, format_output: dynstate::sha512_format_output, initial_state: DynState::new64([ @@ -425,7 +419,6 @@ pub static SHA512: Algorithm = Algorithm { output_len: OutputLen::_512, chaining_len: SHA512_OUTPUT_LEN, block_len: SHA512_BLOCK_LEN, - len_len: SHA512_LEN_LEN, block_data_order: dynstate::sha512_block_data_order, format_output: dynstate::sha512_format_output, initial_state: DynState::new64([ @@ -452,7 +445,6 @@ pub static SHA512_256: Algorithm = Algorithm { output_len: OutputLen::_256, chaining_len: SHA512_OUTPUT_LEN, block_len: SHA512_BLOCK_LEN, - len_len: SHA512_LEN_LEN, block_data_order: dynstate::sha512_block_data_order, format_output: dynstate::sha512_format_output, initial_state: DynState::new64([ @@ -515,9 +507,6 @@ pub const SHA512_OUTPUT_LEN: usize = OutputLen::_512.into(); /// The length of the output of SHA-512/256, in bytes. pub const SHA512_256_OUTPUT_LEN: usize = OutputLen::_256.into(); -/// The length of the length field for SHA-512-based algorithms, in bytes. -const SHA512_LEN_LEN: usize = 128 / 8; - #[derive(Clone, Copy)] enum BlockLen { _512 = 512 / 8, @@ -530,6 +519,21 @@ impl BlockLen { const fn into(self) -> usize { self as usize } + + #[inline(always)] + const fn len_len(self) -> usize { + let len_len = match self { + BlockLen::_512 => LenLen::_64, + BlockLen::_1024 => LenLen::_128, + }; + len_len as usize + } +} + +#[derive(Clone, Copy)] +enum LenLen { + _64 = 64 / 8, + _128 = 128 / 8, } #[derive(Clone, Copy)] From 39a5e7b3510572bf87cbf15c33ea7aec3fa29ef4 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Sat, 14 Dec 2024 22:44:56 -0800 Subject: [PATCH 4/5] digest internals: Clarify panic in `BlockContext::finish`. Clarify that the slicing will panic if (and only if) the function's precondition is violated. This is a step towards providing an non-panicking variant of the API. --- src/digest.rs | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/digest.rs b/src/digest.rs index a844968b68..4895a63d1d 100644 --- a/src/digest.rs +++ b/src/digest.rs @@ -81,31 +81,41 @@ impl BlockContext { cpu_features: cpu::Features, ) -> Digest { let block_len = self.algorithm.block_len(); - assert!(num_pending < block.len()); let block = &mut block[..block_len]; - let mut padding_pos = num_pending; - block[padding_pos] = 0x80; - padding_pos += 1; - - if padding_pos > block.len() - self.algorithm.block_len.len_len() { - block[padding_pos..].fill(0); - let (completed_bytes, leftover) = self.block_data_order(block, cpu_features); - debug_assert_eq!((completed_bytes, leftover.len()), (block_len, 0)); - // We don't increase |self.completed_bytes| because the padding - // isn't data, and so it isn't included in the data length. - padding_pos = 0; - } + let padding = match block.get_mut(num_pending..) { + Some([separator, padding @ ..]) => { + *separator = 0x80; + padding + } + // Precondition violated. + Some([]) | None => unreachable!(), + }; - block[padding_pos..(block_len - 8)].fill(0); + let padding = match padding + .len() + .checked_sub(self.algorithm.block_len.len_len()) + { + Some(_) => padding, + None => { + padding.fill(0); + let (completed_bytes, leftover) = self.block_data_order(block, cpu_features); + debug_assert_eq!((completed_bytes, leftover.len()), (block_len, 0)); + // We don't increase |self.completed_bytes| because the padding + // isn't data, and so it isn't included in the data length. + &mut block[..] + } + }; - // Output the length, in bits, in big endian order. let completed_bytes = self .completed_bytes .checked_add(polyfill::u64_from_usize(num_pending)) .unwrap(); let copmleted_bits = BitLength::from_byte_len(completed_bytes).unwrap(); - block[(block_len - 8)..].copy_from_slice(&copmleted_bits.to_be_bytes()); + + let (to_zero, len) = padding.split_at_mut(padding.len() - 8); + to_zero.fill(0); + len.copy_from_slice(&copmleted_bits.to_be_bytes()); let (completed_bytes, leftover) = self.block_data_order(block, cpu_features); debug_assert_eq!((completed_bytes, leftover.len()), (block_len, 0)); From b967bdf618d9e06d88bb9975f4b3cdcccef032b5 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Sat, 14 Dec 2024 22:55:20 -0800 Subject: [PATCH 5/5] digest internals: Provide an infallible I-U-F API internally. Return an error instead of panic when too much input is processed by a `BlockContext`, or when the precondition of `BlockContext::finish` is violated. Jump through some hoops to get the compiler to prefer the non-error path. This is a step towards providing an non-panicking variant of the API. --- src/digest.rs | 69 ++++++++++++++++++++++++++++++++++++++++----------- src/hmac.rs | 10 ++++++-- 2 files changed, 63 insertions(+), 16 deletions(-) diff --git a/src/digest.rs b/src/digest.rs index 4895a63d1d..92565d1dde 100644 --- a/src/digest.rs +++ b/src/digest.rs @@ -24,7 +24,7 @@ use self::{ }; use crate::{ bits::{BitLength, FromByteLen as _}, - cpu, debug, + cpu, debug, error, polyfill::{self, slice, sliceutil}, }; use core::num::Wrapping; @@ -74,12 +74,19 @@ impl BlockContext { // `num_pending < self.algorithm.block_len`. // // `block` may be arbitrarily overwritten. - pub(crate) fn finish( + pub(crate) fn try_finish( mut self, block: &mut [u8; MAX_BLOCK_LEN], num_pending: usize, cpu_features: cpu::Features, - ) -> Digest { + ) -> Result { + let completed_bytes = self + .completed_bytes + .checked_add(polyfill::u64_from_usize(num_pending)) + .ok_or_else(|| FinishError::too_much_input(self.completed_bytes))?; + let copmleted_bits = BitLength::from_byte_len(completed_bytes) + .map_err(|_: error::Unspecified| FinishError::too_much_input(self.completed_bytes))?; + let block_len = self.algorithm.block_len(); let block = &mut block[..block_len]; @@ -89,7 +96,11 @@ impl BlockContext { padding } // Precondition violated. - Some([]) | None => unreachable!(), + unreachable => { + return Err(FinishError::pending_not_a_partial_block( + unreachable.as_deref(), + )); + } }; let padding = match padding @@ -107,12 +118,6 @@ impl BlockContext { } }; - let completed_bytes = self - .completed_bytes - .checked_add(polyfill::u64_from_usize(num_pending)) - .unwrap(); - let copmleted_bits = BitLength::from_byte_len(completed_bytes).unwrap(); - let (to_zero, len) = padding.split_at_mut(padding.len() - 8); to_zero.fill(0); len.copy_from_slice(&copmleted_bits.to_be_bytes()); @@ -120,10 +125,10 @@ impl BlockContext { let (completed_bytes, leftover) = self.block_data_order(block, cpu_features); debug_assert_eq!((completed_bytes, leftover.len()), (block_len, 0)); - Digest { + Ok(Digest { algorithm: self.algorithm, value: (self.algorithm.format_output)(self.state), - } + }) } #[must_use] @@ -136,6 +141,37 @@ impl BlockContext { } } +pub(crate) enum FinishError { + #[allow(dead_code)] + TooMuchInput(u64), + #[allow(dead_code)] + PendingNotAPartialBlock(usize), +} + +impl FinishError { + #[cold] + #[inline(never)] + fn too_much_input(completed_bytes: u64) -> Self { + Self::TooMuchInput(completed_bytes) + } + + // unreachable + #[cold] + #[inline(never)] + fn pending_not_a_partial_block(padding: Option<&[u8]>) -> Self { + match padding { + None => Self::PendingNotAPartialBlock(0), + Some(padding) => Self::PendingNotAPartialBlock(padding.len()), + } + } +} + +impl From for error::Unspecified { + fn from(_: FinishError) -> Self { + Self + } +} + /// A context for multi-step (Init-Update-Finish) digest calculations. /// /// # Examples @@ -227,10 +263,15 @@ impl Context { /// /// `finish` consumes the context so it cannot be (mis-)used after `finish` /// has been called. - pub fn finish(mut self) -> Digest { + pub fn finish(self) -> Digest { + self.try_finish().unwrap() + } + + fn try_finish(mut self) -> Result { let cpu_features = cpu::features(); self.block - .finish(&mut self.pending, self.num_pending, cpu_features) + .try_finish(&mut self.pending, self.num_pending, cpu_features) + .map_err(error::Unspecified::from) } /// The algorithm that this context is using. diff --git a/src/hmac.rs b/src/hmac.rs index 9306f7f097..3bac7db9d1 100644 --- a/src/hmac.rs +++ b/src/hmac.rs @@ -312,13 +312,19 @@ impl Context { /// the return value of `sign` to a tag. Use `verify` for verification /// instead. pub fn sign(self) -> Tag { - let cpu_features = cpu::features(); + self.try_sign().unwrap() + } + fn try_sign(self) -> Result { + let cpu_features = cpu::features(); let algorithm = self.inner.algorithm(); let buffer = &mut [0u8; digest::MAX_BLOCK_LEN]; let num_pending = algorithm.output_len(); buffer[..num_pending].copy_from_slice(self.inner.finish().as_ref()); - Tag(self.outer.finish(buffer, num_pending, cpu_features)) + self.outer + .try_finish(buffer, num_pending, cpu_features) + .map(Tag) + .map_err(error::Unspecified::from) } }