From 555d598754f92f296bd75669764cedb37d36b75a Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Tue, 31 Dec 2024 20:31:21 -0800 Subject: [PATCH] chacha20-poly1305 internals: Start using `Overlapping` in `open()`. --- src/aead/algorithm.rs | 4 ++- src/aead/chacha.rs | 47 +++++++++++++++-------------------- src/aead/chacha/fallback.rs | 15 +++-------- src/aead/chacha20_poly1305.rs | 26 ++++++++----------- src/aead/overlapping/base.rs | 25 ++++++++++++++++++- 5 files changed, 62 insertions(+), 55 deletions(-) diff --git a/src/aead/algorithm.rs b/src/aead/algorithm.rs index 1556cf5dde..ee8e4da1c4 100644 --- a/src/aead/algorithm.rs +++ b/src/aead/algorithm.rs @@ -18,6 +18,7 @@ use core::ops::RangeFrom; use super::{ aes, aes_gcm, chacha20_poly1305, nonce::{Nonce, NONCE_LEN}, + overlapping::{Overlapping, SrcIndexError}, Aad, KeyInner, Tag, TAG_LEN, }; @@ -259,5 +260,6 @@ fn chacha20_poly1305_open( KeyInner::ChaCha20Poly1305(key) => key, _ => unreachable!(), }; - chacha20_poly1305::open(key, nonce, aad, in_out, src, cpu_features) + let in_out = Overlapping::new(in_out, src).map_err(error::erase::)?; + chacha20_poly1305::open(key, nonce, aad, in_out, cpu_features) } diff --git a/src/aead/chacha.rs b/src/aead/chacha.rs index 53ee1811db..96ad6f9906 100644 --- a/src/aead/chacha.rs +++ b/src/aead/chacha.rs @@ -13,7 +13,7 @@ // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -use super::{quic::Sample, Nonce}; +use super::{overlapping, quic::Sample, Nonce}; #[cfg(any( test, @@ -27,7 +27,8 @@ use super::{quic::Sample, Nonce}; mod fallback; use crate::polyfill::ArraySplitMap; -use core::ops::RangeFrom; + +pub type Overlapping<'o> = overlapping::Overlapping<'o, u8>; #[derive(Clone)] pub struct Key { @@ -45,7 +46,7 @@ impl Key { impl Key { #[inline] pub fn encrypt_in_place(&self, counter: Counter, in_out: &mut [u8]) { - self.encrypt_within(counter, in_out, 0..); + self.encrypt_within(counter, Overlapping::in_place(in_out)) } #[inline] @@ -67,9 +68,8 @@ impl Key { out } - /// Analogous to `slice::copy_within()`. #[inline(always)] - pub fn encrypt_within(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom) { + pub fn encrypt_within(&self, counter: Counter, in_out: Overlapping<'_>) { #[cfg(any( target_arch = "aarch64", target_arch = "arm", @@ -77,27 +77,17 @@ impl Key { target_arch = "x86_64" ))] #[inline(always)] - pub(super) fn ChaCha20_ctr32( - key: &Key, - counter: Counter, - in_out: &mut [u8], - src: RangeFrom, - ) { - let in_out_len = in_out.len().checked_sub(src.start).unwrap(); - + pub(super) fn ChaCha20_ctr32(key: &Key, counter: Counter, in_out: Overlapping<'_>) { // XXX: The x86 and at least one branch of the ARM assembly language // code doesn't allow overlapping input and output unless they are // exactly overlapping. TODO: Figure out which branch of the ARM code // has this limitation and come up with a better solution. // // https://rt.openssl.org/Ticket/Display.html?id=4362 - let (output, input) = - if cfg!(any(target_arch = "aarch64", target_arch = "x86_64")) || src.start == 0 { - (in_out.as_mut_ptr(), in_out[src].as_ptr()) - } else { - in_out.copy_within(src, 0); - (in_out.as_mut_ptr(), in_out.as_ptr()) - }; + #[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))] + let in_out = Overlapping::in_place(in_out.copy_within()); + + let (input, output, len) = in_out.into_input_output_len(); // There's no need to worry if `counter` is incremented because it is // owned here and we drop immediately after the call. @@ -110,7 +100,7 @@ impl Key { counter: &Counter, ); } - unsafe { ChaCha20_ctr32(output, input, in_out_len, key.words_less_safe(), &counter) } + unsafe { ChaCha20_ctr32(output, input, len, key.words_less_safe(), &counter) } } #[cfg(not(any( @@ -121,7 +111,7 @@ impl Key { )))] use fallback::ChaCha20_ctr32; - ChaCha20_ctr32(self, counter, in_out, src); + ChaCha20_ctr32(self, counter, in_out) } #[inline] @@ -189,8 +179,8 @@ const BLOCK_LEN: usize = 64; mod tests { extern crate alloc; - use super::*; - use crate::test; + use super::{super::overlapping::SrcIndexError, *}; + use crate::{error, test}; use alloc::vec; const MAX_ALIGNMENT_AND_OFFSET: (usize, usize) = (15, 259); @@ -232,7 +222,7 @@ mod tests { // works around that. fn chacha20_test( max_alignment_and_offset: (usize, usize), - f: impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom), + f: impl for<'k, 'o> Fn(&'k Key, Counter, Overlapping<'o>), ) { // Reuse a buffer to avoid slowing down the tests with allocations. let mut buf = vec![0u8; 1300]; @@ -278,7 +268,7 @@ mod tests { expected: &[u8], buf: &mut [u8], (max_alignment, max_offset): (usize, usize), - f: &impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom), + f: &impl for<'k, 'o> Fn(&'k Key, Counter, Overlapping<'o>), ) { const ARBITRARY: u8 = 123; @@ -295,7 +285,10 @@ mod tests { Nonce::try_assume_unique_for_key(nonce).unwrap(), ctr, ); - f(key, ctr, buf, src); + let in_out = Overlapping::new(buf, src) + .map_err(error::erase::) + .unwrap(); + f(key, ctr, in_out); assert_eq!(&buf[..input.len()], expected) } } diff --git a/src/aead/chacha/fallback.rs b/src/aead/chacha/fallback.rs index 857edcf16e..bfff7e8518 100644 --- a/src/aead/chacha/fallback.rs +++ b/src/aead/chacha/fallback.rs @@ -15,15 +15,10 @@ // Adapted from the public domain, estream code by D. Bernstein. // Adapted from the BoringSSL crypto/chacha/chacha.c. -use super::{Counter, Key, BLOCK_LEN}; -use core::{mem::size_of, ops::RangeFrom}; +use super::{Counter, Key, Overlapping, BLOCK_LEN}; +use core::mem::size_of; -pub(super) fn ChaCha20_ctr32( - key: &Key, - counter: Counter, - in_out: &mut [u8], - src: RangeFrom, -) { +pub(super) fn ChaCha20_ctr32(key: &Key, counter: Counter, in_out: Overlapping<'_>) { const SIGMA: [u32; 4] = [ u32::from_le_bytes(*b"expa"), u32::from_le_bytes(*b"nd 3"), @@ -39,9 +34,7 @@ pub(super) fn ChaCha20_ctr32( key[6], key[7], counter[0], counter[1], counter[2], counter[3], ]; - let mut in_out_len = in_out.len().checked_sub(src.start).unwrap(); - let mut input = in_out[src].as_ptr(); - let mut output = in_out.as_mut_ptr(); + let (mut input, mut output, mut in_out_len) = in_out.into_input_output_len(); let mut buf = [0u8; BLOCK_LEN]; while in_out_len > 0 { diff --git a/src/aead/chacha20_poly1305.rs b/src/aead/chacha20_poly1305.rs index e0f5e9e2c7..2e84612a03 100644 --- a/src/aead/chacha20_poly1305.rs +++ b/src/aead/chacha20_poly1305.rs @@ -13,14 +13,13 @@ // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. use super::{ - chacha::{self, Counter, Iv}, + chacha::{self, Counter, Iv, Overlapping}, poly1305, Aad, Nonce, Tag, }; use crate::{ cpu, error, polyfill::{u64_from_usize, usize_from_u64_saturated}, }; -use core::ops::RangeFrom; pub(super) const KEY_LEN: usize = chacha::KEY_LEN; @@ -126,17 +125,12 @@ pub(super) fn open( key: &Key, nonce: Nonce, aad: Aad<&[u8]>, - in_out: &mut [u8], - src: RangeFrom, + in_out: Overlapping<'_>, cpu_features: cpu::Features, ) -> Result { let Key(chacha20_key) = key; - let unprefixed_len = in_out - .len() - .checked_sub(src.start) - .ok_or(error::Unspecified)?; - if unprefixed_len > MAX_IN_OUT_LEN { + if in_out.len() > MAX_IN_OUT_LEN { return Err(error::Unspecified); } // RFC 8439 Section 2.8 says the maximum AAD length is 2**64 - 1, which is @@ -180,11 +174,12 @@ pub(super) fn open( ); } + let (input, output, len) = in_out.into_input_output_len(); let out = unsafe { chacha20_poly1305_open( - in_out.as_mut_ptr(), - in_out.as_ptr().add(src.start), - unprefixed_len, + output, + input, + len, aad.as_ref().as_ptr(), aad.as_ref().len(), &mut data, @@ -202,9 +197,10 @@ pub(super) fn open( }; poly1305_update_padded_16(&mut auth, aad.as_ref()); - poly1305_update_padded_16(&mut auth, &in_out[src.clone()]); - chacha20_key.encrypt_within(counter, in_out, src.clone()); - Ok(finish(auth, aad.as_ref().len(), unprefixed_len)) + poly1305_update_padded_16(&mut auth, in_out.input()); + let in_out_len = in_out.len(); + chacha20_key.encrypt_within(counter, in_out); + Ok(finish(auth, aad.as_ref().len(), in_out_len)) } #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] diff --git a/src/aead/overlapping/base.rs b/src/aead/overlapping/base.rs index cd5a3f5dde..f4e217f410 100644 --- a/src/aead/overlapping/base.rs +++ b/src/aead/overlapping/base.rs @@ -15,6 +15,7 @@ use core::ops::RangeFrom; pub struct Overlapping<'o, T> { + // Invariant: self.src.start <= in_out.len(). in_out: &'o mut [T], src: RangeFrom, } @@ -31,6 +32,20 @@ impl<'o, T> Overlapping<'o, T> { } } + #[cfg(any(target_arch = "arm", target_arch = "x86"))] + pub fn copy_within(self) -> &'o mut [T] + where + T: Copy, + { + if self.src.start == 0 { + self.in_out + } else { + let len = self.len(); + self.in_out.copy_within(self.src, 0); + &mut self.in_out[..len] + } + } + #[cfg(any(target_arch = "arm", target_arch = "x86"))] pub fn into_slice_src_mut(self) -> (&'o mut [T], RangeFrom) { (self.in_out, self.src) @@ -39,8 +54,16 @@ impl<'o, T> Overlapping<'o, T> { impl Overlapping<'_, T> { pub fn len(&self) -> usize { - self.in_out[self.src.clone()].len() + self.input().len() } + + pub fn input(&self) -> &[T] { + self.in_out.get(self.src.clone()).unwrap_or_else(|| { + // Ensured by invariant. + unreachable!() + }) + } + pub fn into_input_output_len(self) -> (*const T, *mut T, usize) { let len = self.len(); let output = self.in_out.as_mut_ptr();