From 9b7790828a4fb824c722f0802bca248cb3d8ff33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mello?= <3285133+asmello@users.noreply.github.com> Date: Wed, 16 Oct 2024 17:19:42 +0100 Subject: [PATCH] Refactor constructor safety (#76) * unsafe enew_unchecked in Pointer and PointerBuf * from_encoded_unchecked now unsafe --------- Co-authored-by: Chance --- CHANGELOG.md | 9 +++- src/pointer.rs | 138 ++++++++++++++++++++++++++++++++++++++----------- src/token.rs | 20 ++++--- 3 files changed, 130 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9b7b8b..3a12a7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -- Bump minimum Rust version to 1.79 +### Added + +- Adds unsafe associated methods `Pointer::new_unchecked` and `PointerBuf::new_unchecked` for + external zero-cost construction. + +### Changed + +- Bumps minimum Rust version to 1.79 ## [0.6.2] 2024-09-30 diff --git a/src/pointer.rs b/src/pointer.rs index e09cf5d..4f136bb 100644 --- a/src/pointer.rs +++ b/src/pointer.rs @@ -49,11 +49,21 @@ impl core::fmt::Display for Pointer { } } impl Pointer { - /// Private constructor for strings that are known to be correctly encoded + /// Create a `Pointer` from a string that is known to be correctly encoded. /// - /// This is a zero-copy constructor - fn new + ?Sized>(s: &S) -> &Self { - unsafe { &*(core::ptr::from_ref::(s.as_ref()) as *const Self) } + /// This is a cost-free conversion. + /// + /// ## Safety + /// The provided string must adhere to [RFC 6901](https://datatracker.ietf.org/doc/html/rfc6901): + /// + /// - The pointer must start with `'/'` (%x2F) unless empty + /// - Tokens must be properly encoded: + /// - `'~'` (%x7E) must be escaped as `"~0"` + /// - `'/'` (%x2F) must be escaped as `"~1"` + /// + /// For potentially fallible parsing, see [`Pointer::parse`]. + pub unsafe fn new_unchecked + ?Sized>(s: &S) -> &Self { + &*(core::ptr::from_ref::(s.as_ref()) as *const Self) } /// Constant reference to a root pointer @@ -72,7 +82,8 @@ impl Pointer { /// ## Errors /// Returns a `ParseError` if the string is not a valid JSON Pointer. pub fn parse + ?Sized>(s: &S) -> Result<&Self, ParseError> { - validate(s.as_ref()).map(Self::new) + // SAFETY: we validate first + validate(s.as_ref()).map(|s| unsafe { Self::new_unchecked(s) }) } /// Creates a static `Pointer` from a string. @@ -135,7 +146,8 @@ impl Pointer { pub fn back(&self) -> Option { self.0 .rsplit_once('/') - .map(|(_, back)| Token::from_encoded_unchecked(back)) + // SAFETY: pointer is encoded + .map(|(_, back)| unsafe { Token::from_encoded_unchecked(back) }) } /// Returns the last token in the `Pointer`. @@ -152,9 +164,10 @@ impl Pointer { } self.0[1..] .split_once('/') + // SAFETY: source pointer is encoded .map_or_else( - || Token::from_encoded_unchecked(&self.0[1..]), - |(front, _)| Token::from_encoded_unchecked(front), + || unsafe { Token::from_encoded_unchecked(&self.0[1..]) }, + |(front, _)| unsafe { Token::from_encoded_unchecked(front) }, ) .into() } @@ -174,10 +187,22 @@ impl Pointer { self.0[1..] .find('/') .map_or_else( - || (Token::from_encoded_unchecked(&self.0[1..]), Self::root()), + || { + ( + // SAFETY: source pointer is encoded + unsafe { Token::from_encoded_unchecked(&self.0[1..]) }, + Self::root(), + ) + }, |idx| { let (front, back) = self.0[1..].split_at(idx); - (Token::from_encoded_unchecked(front), Self::new(back)) + ( + // SAFETY: source pointer is encoded + unsafe { Token::from_encoded_unchecked(front) }, + // SAFETY: we split at a token boundary, so back is + // valid pointer. + unsafe { Self::new_unchecked(back) }, + ) }, ) .into() @@ -209,30 +234,48 @@ impl Pointer { return None; } let (head, tail) = self.0.split_at(offset); - Some((Self::new(head), Self::new(tail))) + // SAFETY: we split at a token boundary, so head and tail are valid pointers + unsafe { Some((Self::new_unchecked(head), Self::new_unchecked(tail))) } } /// Splits the `Pointer` into the parent path and the last `Token`. pub fn split_back(&self) -> Option<(&Self, Token)> { - self.0 - .rsplit_once('/') - .map(|(front, back)| (Self::new(front), Token::from_encoded_unchecked(back))) + self.0.rsplit_once('/').map(|(front, back)| { + ( + // SAFETY: we split at a token boundary, so front is a valid pointer + unsafe { Self::new_unchecked(front) }, + // SAFETY: source token is encoded + unsafe { Token::from_encoded_unchecked(back) }, + ) + }) } /// A pointer to the parent of the current path. pub fn parent(&self) -> Option<&Self> { - self.0.rsplit_once('/').map(|(front, _)| Self::new(front)) + // SAFETY: we split at a token boundary, so front is a valid pointer + self.0 + .rsplit_once('/') + .map(|(front, _)| unsafe { Self::new_unchecked(front) }) } /// Returns the pointer stripped of the given suffix. pub fn strip_suffix<'a>(&'a self, suffix: &Self) -> Option<&'a Self> { - self.0.strip_suffix(&suffix.0).map(Self::new) + self.0 + .strip_suffix(&suffix.0) + // SAFETY: the suffix is a valid pointer, so removing it from the + // back of another pointer will preserve token boundaries + .map(|s| unsafe { Self::new_unchecked(s) }) } /// Returns the pointer stripped of the given prefix. pub fn strip_prefix<'a>(&'a self, prefix: &Self) -> Option<&'a Self> { - self.0.strip_prefix(&prefix.0).map(Self::new) + self.0 + .strip_prefix(&prefix.0) + // SAFETY: the suffix is a valid pointer, so removing it from the + // front of another pointer will preserve token boundaries + .map(|s| unsafe { Self::new_unchecked(s) }) } + /// Attempts to get a `Token` by the index. Returns `None` if the index is /// out of bounds. /// @@ -491,7 +534,7 @@ impl Pointer { /// /// ## Examples /// ``` - /// let mut ptr = jsonptr::Pointer::from_static("/foo/bar").to_buf(); + /// let mut ptr = jsonptr::PointerBuf::parse("/foo/bar").unwrap(); /// assert_eq!(ptr.len(), 8); /// /// ptr.push_back("~"); @@ -801,11 +844,26 @@ impl<'a> IntoIterator for &'a Pointer { pub struct PointerBuf(String); impl PointerBuf { + /// Creates a new `PointerBuf` pointing to a document root. + /// + /// This is an alias to [`Self::new`]. + pub fn root() -> Self { + Self::new() + } + /// Creates a new `PointerBuf` pointing to a document root. pub fn new() -> Self { Self(String::new()) } + /// Create a `PointerBuf` from a string that is known to be correctly encoded. + /// + /// ## Safety + /// The provided string must adhere to RFC 6901. + pub unsafe fn new_unchecked(s: impl Into) -> Self { + Self(s.into()) + } + /// Attempts to parse a string into a `PointerBuf`. /// /// ## Errors @@ -844,7 +902,8 @@ impl PointerBuf { /// Removes and returns the last `Token` in the `Pointer` if it exists. pub fn pop_back(&mut self) -> Option> { if let Some(idx) = self.0.rfind('/') { - let back = Token::from_encoded_unchecked(self.0.split_off(idx + 1)); + // SAFETY: source pointer is encoded + let back = unsafe { Token::from_encoded_unchecked(self.0.split_off(idx + 1)) }; self.0.pop(); // remove trailing `/` Some(back) } else { @@ -862,8 +921,10 @@ impl PointerBuf { } else { core::mem::take(&mut self.0) }; - token.remove(0); // remove leading `/` - Token::from_encoded_unchecked(token) + // remove leading `/` + token.remove(0); + // SAFETY: source pointer is encoded + unsafe { Token::from_encoded_unchecked(token) } }) } @@ -936,7 +997,8 @@ impl Borrow for PointerBuf { impl Deref for PointerBuf { type Target = Pointer; fn deref(&self) -> &Self::Target { - Pointer::new(&self.0) + // SAFETY: we hold a valid pointer + unsafe { Pointer::new_unchecked(self.0.as_str()) } } } @@ -1244,17 +1306,35 @@ mod tests { let _ = Pointer::from_static("foo/bar"); } + #[test] + fn root_is_alias_of_new_pathbuf() { + assert_eq!(PointerBuf::root(), PointerBuf::new()); + } + + #[test] + fn from_unchecked_pathbuf() { + let s = "/foo/bar/0"; + assert_eq!( + unsafe { PointerBuf::new_unchecked(String::from(s)) }, + PointerBuf::parse(s).unwrap() + ); + } + #[test] fn strip_suffix() { - let p = Pointer::new("/example/pointer/to/some/value"); - let stripped = p.strip_suffix(Pointer::new("/to/some/value")).unwrap(); + let p = Pointer::from_static("/example/pointer/to/some/value"); + let stripped = p + .strip_suffix(Pointer::from_static("/to/some/value")) + .unwrap(); assert_eq!(stripped, "/example/pointer"); } #[test] fn strip_prefix() { - let p = Pointer::new("/example/pointer/to/some/value"); - let stripped = p.strip_prefix(Pointer::new("/example/pointer")).unwrap(); + let p = Pointer::from_static("/example/pointer/to/some/value"); + let stripped = p + .strip_prefix(Pointer::from_static("/example/pointer")) + .unwrap(); assert_eq!(stripped, "/to/some/value"); } @@ -1434,13 +1514,13 @@ mod tests { assert_eq!(ptr.tokens().count(), 3); let mut token = ptr.pop_front(); - assert_eq!(token, Some(Token::from_encoded_unchecked("bar"))); + assert_eq!(token, Some(Token::new("bar"))); assert_eq!(ptr.tokens().count(), 2); token = ptr.pop_front(); - assert_eq!(token, Some(Token::from_encoded_unchecked(""))); + assert_eq!(token, Some(Token::new(""))); assert_eq!(ptr.tokens().count(), 1); token = ptr.pop_front(); - assert_eq!(token, Some(Token::from_encoded_unchecked(""))); + assert_eq!(token, Some(Token::new(""))); assert_eq!(ptr.tokens().count(), 0); assert_eq!(ptr, Pointer::root()); } diff --git a/src/token.rs b/src/token.rs index 2b3ea65..1b70955 100644 --- a/src/token.rs +++ b/src/token.rs @@ -42,7 +42,10 @@ impl<'a> Token<'a> { /// /// This is like [`Self::from_encoded`], except that no validation is /// performed on the input string. - pub(crate) fn from_encoded_unchecked(inner: impl Into>) -> Self { + /// + /// ## Safety + /// Input string must be RFC 6901 encoded. + pub(crate) unsafe fn from_encoded_unchecked(inner: impl Into>) -> Self { Self { inner: inner.into(), } @@ -255,7 +258,8 @@ macro_rules! impl_from_num { $( impl From<$ty> for Token<'static> { fn from(v: $ty) -> Self { - Token::from_encoded_unchecked(v.to_string()) + // SAFETY: only used for integer types, which are always valid + unsafe { Token::from_encoded_unchecked(v.to_string()) } } } )* @@ -312,7 +316,10 @@ pub struct Tokens<'a> { impl<'a> Iterator for Tokens<'a> { type Item = Token<'a>; fn next(&mut self) -> Option { - self.inner.next().map(Token::from_encoded_unchecked) + self.inner + .next() + // SAFETY: source pointer is encoded + .map(|s| unsafe { Token::from_encoded_unchecked(s) }) } } impl<'t> Tokens<'t> { @@ -476,13 +483,12 @@ mod tests { fn tokens() { let pointer = Pointer::from_static("/a/b/c"); let tokens: Vec = pointer.tokens().collect(); - assert_eq!( - tokens, + assert_eq!(tokens, unsafe { vec![ Token::from_encoded_unchecked("a"), Token::from_encoded_unchecked("b"), - Token::from_encoded_unchecked("c") + Token::from_encoded_unchecked("c"), ] - ); + }); } }