From 9cec5e7b783c6d5ab4b62748f051258c8aadaa8b Mon Sep 17 00:00:00 2001 From: Jed Denlea Date: Mon, 16 Sep 2024 13:51:04 -0700 Subject: [PATCH] Tighten JsonPointer and methods The `JsonPointer` type ought to be `#[repr(transparent)]` if `std::mem::transmut` is going to be used to coerce one from a `str`. `ReferenceToken` already did. The `validate` method did not ensure that a string starts with a `/`. This may not have led a direct path to any undefined behavior, because it was ensured that all `~` are followed by a `0` or `1`, but other methods do act as though they assumed a pointer string always starts with a `/`. I think this makes things a bit more clear. The private `token_end` can be mostly replaced by `find("/")`. `as_array_index` can be implemented in terms of the standard library's integer parsing. (Allowing "+000" is an annoying gotcha in seemingly every major language's standard library!). --- .../sd-primitives/src/json_pointer.rs | 89 +++++++------------ 1 file changed, 31 insertions(+), 58 deletions(-) diff --git a/crates/claims/crates/data-integrity/sd-primitives/src/json_pointer.rs b/crates/claims/crates/data-integrity/sd-primitives/src/json_pointer.rs index c957d1416..e668cd0a1 100644 --- a/crates/claims/crates/data-integrity/sd-primitives/src/json_pointer.rs +++ b/crates/claims/crates/data-integrity/sd-primitives/src/json_pointer.rs @@ -1,5 +1,5 @@ -use core::fmt; -use std::{borrow::Cow, ops::Deref, str::FromStr}; +use core::{fmt, ops::Deref, str::FromStr}; +use std::borrow::Cow; use serde::{Deserialize, Serialize}; @@ -11,6 +11,7 @@ pub struct InvalidJsonPointer(pub T); /// /// See: #[derive(Debug, Serialize)] +#[repr(transparent)] pub struct JsonPointer(str); impl JsonPointer { @@ -32,15 +33,16 @@ impl JsonPointer { std::mem::transmute(s) } - pub fn validate(str: &str) -> bool { - let mut chars = str.chars(); - while let Some(c) = chars.next() { - if c == '~' && !matches!(chars.next(), Some('0' | '1')) { - return false; - } - } - - true + /// Confirms the validity of a string such that it may be safely used for + /// [`Self::new_unchecked`]. + pub fn validate(s: &str) -> bool { + s.is_empty() + || s.starts_with("/") + && core::iter::from_fn({ + let mut chars = s.chars(); + move || Some(chars.next()? != '~' || matches!(chars.next(), Some('0' | '1'))) + }) + .all(core::convert::identity) } pub fn as_str(&self) -> &str { @@ -51,31 +53,14 @@ impl JsonPointer { self.0.is_empty() } - fn token_end(&self) -> Option { - if self.is_empty() { - None - } else { - let mut i = 1; - - let bytes = self.0.as_bytes(); - while i < bytes.len() { - if bytes[i] == b'/' { - break; - } - - i += 1 - } - - Some(i) - } - } - pub fn split_first(&self) -> Option<(&ReferenceToken, &Self)> { - self.token_end().map(|i| unsafe { - ( - ReferenceToken::new_unchecked(&self.0[1..i]), - Self::new_unchecked(&self.0[i..]), - ) + self.0.strip_prefix("/").map(|s| { + let (left, right) = s.find("/").map(|idx| s.split_at(idx)).unwrap_or((s, "")); + // Safety: the token is guaranteed not to include a '/', and remaining shall be either + // empty or a valid pointer starting with '/'. + let token = unsafe { ReferenceToken::new_unchecked(left) }; + let remaining = unsafe { Self::new_unchecked(right) }; + (token, remaining) }) } @@ -211,39 +196,27 @@ impl ReferenceToken { } pub fn decode(&self) -> String { - let mut result = String::new(); + let mut buf = String::with_capacity(self.0.len()); let mut chars = self.0.chars(); - while let Some(c) = chars.next() { - let decoded_c = match c { + buf.extend(core::iter::from_fn(|| { + Some(match chars.next()? { '~' => match chars.next() { Some('0') => '~', Some('1') => '/', _ => unreachable!(), }, c => c, - }; - - result.push(decoded_c); - } - - result + }) + })); + buf } pub fn as_array_index(&self) -> Option { - let mut chars = self.0.chars(); - let mut i = chars.next()?.to_digit(10)? as usize; - if i == 0 { - match chars.next() { - Some(_) => None, - None => Some(0), - } - } else { - for c in chars { - let d = c.to_digit(10)? as usize; - i = i * 10 + d; - } - - Some(i) + // Like usize::from_str, but don't allow leading '+' or '0'. + match self.0.as_bytes() { + [c @ b'0'..=b'9'] => Some((c - b'0') as usize), + [b'1'..=b'9', ..] => self.0.parse().ok(), + _ => None, } } }