Skip to content

Commit

Permalink
mark unchecked functions as unsafe and improve documentation for NoAl…
Browse files Browse the repository at this point in the history
…locString
  • Loading branch information
ekump committed Sep 4, 2024
1 parent b8fb0eb commit f48e785
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 36 deletions.
16 changes: 6 additions & 10 deletions trace-utils/src/msgpack_decoder/v04/decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ use std::{collections::HashMap, f64};
/// assert_eq!(1, decoded_traces.len());
/// assert_eq!(1, decoded_traces[0].len());
/// let decoded_span = &decoded_traces[0][0];
/// assert_eq!("test-span", decoded_span.name.as_str());
/// assert_eq!("test-span", decoded_span.name.as_str().unwrap());
/// ```
pub fn from_slice(data: tinybytes::Bytes) -> Result<Vec<Vec<Span>>, DecodeError> {
let mut local_buf = data.as_ref();
Expand Down Expand Up @@ -122,16 +122,14 @@ fn read_str_map_to_no_alloc_strings(
let (val, next) = read_string_ref(buf)?;
*buf = next;

let key = buf_wrapper.create_no_alloc_string_unchecked(key.as_bytes());
let value = buf_wrapper.create_no_alloc_string_unchecked(val.as_bytes());
let key = unsafe { buf_wrapper.create_no_alloc_string_unchecked(key.as_bytes()) };
let value = unsafe { buf_wrapper.create_no_alloc_string_unchecked(val.as_bytes()) };
map.insert(key, value);
}
Ok(map)
}

#[inline]
// Safety: read_string_ref checks utf8 validity, so we don't do it again when creating the
// NoAllocStrings.
fn read_metric_pair(
buffer_wrapper: &BufferWrapper,
buf: &mut &[u8],
Expand All @@ -140,7 +138,7 @@ fn read_metric_pair(
*buf = next;

let v = read_number(buf)?.try_into()?;
let key = buffer_wrapper.create_no_alloc_string_unchecked(key.as_bytes());
let key = unsafe { buffer_wrapper.create_no_alloc_string_unchecked(key.as_bytes()) };

Ok((key, v))
}
Expand All @@ -152,8 +150,6 @@ fn read_metrics(
read_map(len, buf_wrapper, buf, read_metric_pair)
}

// Safety: read_string_ref checks utf8 validity, so we don't do it again when creating the
// NoAllocStrings.
fn read_meta_struct(
buf_wrapper: &BufferWrapper,
buf: &mut &[u8],
Expand All @@ -174,7 +170,7 @@ fn read_meta_struct(
let value = read_number(buf)?.try_into()?;
v.push(value);
}
let key = buf_wrapper.create_no_alloc_string_unchecked(key.as_bytes());
let key = unsafe { buf_wrapper.create_no_alloc_string_unchecked(key.as_bytes()) };
Ok((key, v))
}

Expand Down Expand Up @@ -271,7 +267,7 @@ mod tests {
assert_eq!(1, decoded_traces.len());
assert_eq!(1, decoded_traces[0].len());
let decoded_span = &decoded_traces[0][0];
assert_eq!(expected_string, decoded_span.name.as_str());
assert_eq!(expected_string, decoded_span.name.as_str().unwrap());
}

#[test]
Expand Down
10 changes: 6 additions & 4 deletions trace-utils/src/msgpack_decoder/v04/decoder/span.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,19 @@ fn fill_span(
match key {
SpanKey::Service => {
let (value, next) = read_string_ref(buf)?;
span.service = buf_wrapper.create_no_alloc_string_unchecked(value.as_bytes());
span.service =
unsafe { buf_wrapper.create_no_alloc_string_unchecked(value.as_bytes()) };
*buf = next;
}
SpanKey::Name => {
let (value, next) = read_string_ref(buf)?;
span.name = buf_wrapper.create_no_alloc_string_unchecked(value.as_bytes());
span.name = unsafe { buf_wrapper.create_no_alloc_string_unchecked(value.as_bytes()) };
*buf = next;
}
SpanKey::Resource => {
let (value, next) = read_string_ref(buf)?;
span.resource = buf_wrapper.create_no_alloc_string_unchecked(value.as_bytes());
span.resource =
unsafe { buf_wrapper.create_no_alloc_string_unchecked(value.as_bytes()) };
*buf = next;
}
SpanKey::TraceId => span.trace_id = read_number(buf)?.try_into()?,
Expand All @@ -82,7 +84,7 @@ fn fill_span(
SpanKey::Error => span.error = read_number(buf)?.try_into()?,
SpanKey::Type => {
let (value, next) = read_string_ref(buf)?;
span.r#type = buf_wrapper.create_no_alloc_string_unchecked(value.as_bytes());
span.r#type = unsafe { buf_wrapper.create_no_alloc_string_unchecked(value.as_bytes()) };
*buf = next;
}
SpanKey::Meta => span.meta = read_str_map_to_no_alloc_strings(buf_wrapper, buf)?,
Expand Down
8 changes: 3 additions & 5 deletions trace-utils/src/msgpack_decoder/v04/decoder/span_link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,7 @@ pub(crate) fn read_span_links(
DecodeError::InvalidFormat("Unable to read marker for span links".to_owned())
})? {
Marker::FixArray(len) => {
let mut vec: Vec<SpanLink> = Vec::with_capacity(
len.try_into()
.expect("Unable to cast FixArray len to usize"),
);
let mut vec: Vec<SpanLink> = Vec::with_capacity(len.into());
for _ in 0..len {
vec.push(decode_span_link(buf_wrapper, buf)?);
}
Expand Down Expand Up @@ -97,7 +94,8 @@ fn decode_span_link(buf_wrapper: &BufferWrapper, buf: &mut &[u8]) -> Result<Span
}
SpanLinkKey::Tracestate => {
let (val, next) = read_string_ref(buf)?;
span.tracestate = buf_wrapper.create_no_alloc_string_unchecked(val.as_bytes());
span.tracestate =
unsafe { buf_wrapper.create_no_alloc_string_unchecked(val.as_bytes()) };
*buf = next;
}
SpanLinkKey::Flags => span.flags = read_number(buf)?.try_into()?,
Expand Down
139 changes: 122 additions & 17 deletions trace-utils/src/no_alloc_string.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,65 @@
use serde::ser::{Serialize, Serializer};
use std::borrow::Borrow;
use std::str::Utf8Error;
use tinybytes;

pub struct BufferWrapper {
buffer: tinybytes::Bytes,
}

impl BufferWrapper {
/// Creates a new `BufferWrapper` from a `tinybytes::Bytes` instance.
///
/// # Arguments
///
/// * `buffer` - A `tinybytes::Bytes` instance to be wrapped.
///
/// # Returns
///
/// A new `BufferWrapper` instance containing the provided buffer.
pub fn new(buffer: tinybytes::Bytes) -> Self {
BufferWrapper { buffer }
}

/// Creates a NoAllocString from a slice of tinybytes::Bytes.
/// Creates a `NoAllocString` from a slice of bytes within the wrapped buffer.
///
/// This function validates that the provided slice is valid UTF-8. If the slice is not valid
/// UTF-8, an error is returned.
///
/// # Arguments
///
/// * `slice` - A byte slice that will be converted into a `NoAllocString`.
///
/// # Returns
///
/// A `Result` containing the `NoAllocString` if the slice is valid UTF-8, or a `Utf8Error` if
/// the slice is not valid UTF-8.
///
/// # Errors
///
/// Returns a `Utf8Error` if the bytes are not valid UTF-8.
pub fn create_no_alloc_string(
&self,
slice: &[u8],
) -> Result<NoAllocString, std::str::Utf8Error> {
NoAllocString::from_bytes(self.buffer.slice_ref(slice).expect("Invalid slice"))
}

/// Creates a NoAllocString from a slice of tinybytes::Bytes without validating the bytes are
/// UTF-8.
pub fn create_no_alloc_string_unchecked(&self, slice: &[u8]) -> NoAllocString {
/// Creates a `NoAllocString` from a slice of bytes within the wrapped buffer without validating
/// the bytes.
///
/// This function does not perform any validation on the provided bytes, and assumes that the
/// bytes are valid UTF-8. If the bytes are not valid UTF-8, the behavior is undefined.
///
/// # Arguments
///
/// * `slice` - A byte slice that will be converted into a `NoAllocString`.
///
/// # Safety
///
/// This function is unsafe because it assumes the bytes are valid UTF-8. If the bytes are not
/// valid UTF-8, the behavior is undefined.
pub unsafe fn create_no_alloc_string_unchecked(&self, slice: &[u8]) -> NoAllocString {
NoAllocString::from_bytes_unchecked(self.buffer.slice_ref(slice).expect("Invalid slice"))
}
}
Expand All @@ -36,32 +74,97 @@ impl Serialize for NoAllocString {
where
S: Serializer,
{
serializer.serialize_str(self.as_str())
// This should be safe because we have already validated that the bytes are valid UTF-8 when
// creating the NoAllocString.
unsafe { serializer.serialize_str(self.as_str_unchecked()) }
}
}

impl NoAllocString {
// Creates a NoAllocString from a full slice (copies the data, so technically not no-alloc)
/// Creates a `NoAllocString` from a slice of bytes.
///
/// This function validates that the provided slice is valid UTF-8. If the slice is not valid
/// UTF-8, an error is returned.
///
/// # Arguments
///
/// * `slice` - A byte slice that will be converted into a `NoAllocString`.
///
/// # Returns
///
/// A `Result` containing the `NoAllocString` if the slice is valid UTF-8, or a `Utf8Error` if
/// the slice is not valid UTF-8.
///
/// # Errors
///
/// Returns a `Utf8Error` if the bytes are not valid UTF-8.
pub fn from_slice(slice: &[u8]) -> Result<NoAllocString, std::str::Utf8Error> {
std::str::from_utf8(slice)?;
Ok(NoAllocString {
bytes: tinybytes::Bytes::copy_from_slice(slice),
})
}
// Creates a NoAllocString from a Bytes instance (does not copy the data)

/// Creates a `NoAllocString` from a `tinybytes::Bytes` instance.
///
/// This function validates that the provided `Bytes` instance contains valid UTF-8 data. If the
/// data is not valid UTF-8, an error is returned.
///
/// # Arguments
///
/// * `bytes` - A `tinybytes::Bytes` instance that will be converted into a `NoAllocString`.
///
/// # Returns
///
/// A `Result` containing the `NoAllocString` if the bytes are valid UTF-8, or a `Utf8Error` if
/// the bytes are not valid UTF-8.
///
/// # Errors
///
/// Returns a `Utf8Error` if the bytes are not valid UTF-8.
pub fn from_bytes(bytes: tinybytes::Bytes) -> Result<NoAllocString, std::str::Utf8Error> {
std::str::from_utf8(&bytes)?;
Ok(NoAllocString { bytes })
}

/// Creates a NoAllocString from a Bytes instance without validating the bytes are UTF-8.
/// Creates a `NoAllocString` from a `tinybytes::Bytes` instance without validating the bytes.
///
/// This function does not perform any validation on the provided bytes, and assumes that the
/// bytes are valid UTF-8. If the bytes are not valid UTF-8, the behavior is undefined.
///
/// # Arguments
///
/// * `bytes` - A `tinybytes::Bytes` instance that will be converted into a `NoAllocString`.
///
/// # Safety
///
/// This function is unsafe because it assumes the bytes are valid UTF-8. If the bytes are not
/// valid UTF-8, the behavior is undefined.
pub fn from_bytes_unchecked(bytes: tinybytes::Bytes) -> NoAllocString {
NoAllocString { bytes }
}

/// Safety: This is unsafe and assumes the bytes are valid UTF-8. When creating a NoAllocString
/// the bytes are validated to be UTF-8.
pub fn as_str(&self) -> &str {
/// Returns the string slice representation of the `NoAllocString`. The slice is checked to be
/// valid UTF-8. If you use `from_bytes` or `from_slice` this check was already performed and
/// you may want to use `as_str_unchecked` instead.
///
/// # Errors
///
/// Returns a `Utf8Error` if the bytes are not valid UTF-8.
pub fn as_str(&self) -> Result<&str, Utf8Error> {
std::str::from_utf8(&self.bytes)
}

/// Returns the string slice representation of the `NoAllocString` without validating the bytes.
/// Typically, you should use `from_slice` or `from_bytes` when creating a NoAllocString to
/// ensure the bytes are valid UTF-8 (if the bytes haven't already been validated by other
/// means) so further validation may be unnecessary.
///
/// # Safety
///
/// This function is unsafe because it assumes the bytes are valid UTF-8. If the bytes are not
/// valid UTF-8, the behavior is undefined.
pub unsafe fn as_str_unchecked(&self) -> &str {
// SAFETY: This is unsafe and assumes the bytes are valid UTF-8.
unsafe { std::str::from_utf8_unchecked(&self.bytes) }
}
Expand All @@ -77,7 +180,9 @@ impl Default for NoAllocString {

impl Borrow<str> for NoAllocString {
fn borrow(&self) -> &str {
self.as_str()
// This is safe because we have already validated that the bytes are valid UTF-8 when
// creating the NoAllocString.
unsafe { self.as_str_unchecked() }
}
}

Expand All @@ -90,7 +195,7 @@ mod tests {
fn test_from_slice() {
let slice = b"hello";
let no_alloc_string = NoAllocString::from_slice(slice).unwrap();
assert_eq!(no_alloc_string.as_str(), "hello");
assert_eq!(no_alloc_string.as_str().unwrap(), "hello");
}

#[test]
Expand All @@ -104,7 +209,7 @@ mod tests {
fn test_from_bytes() {
let bytes = tinybytes::Bytes::copy_from_slice(b"world");
let no_alloc_string = NoAllocString::from_bytes(bytes).unwrap();
assert_eq!(no_alloc_string.as_str(), "world");
assert_eq!(no_alloc_string.as_str().unwrap(), "world");
}

#[test]
Expand All @@ -118,13 +223,13 @@ mod tests {
fn test_from_bytes_unchecked() {
let bytes = tinybytes::Bytes::copy_from_slice(b"unchecked");
let no_alloc_string = NoAllocString::from_bytes_unchecked(bytes);
assert_eq!(no_alloc_string.as_str(), "unchecked");
assert_eq!(no_alloc_string.as_str().unwrap(), "unchecked");
}

#[test]
fn test_as_str() {
let no_alloc_string = NoAllocString::from_slice(b"test").unwrap();
assert_eq!(no_alloc_string.as_str(), "test");
assert_eq!(no_alloc_string.as_str().unwrap(), "test");
}

#[test]
Expand All @@ -137,7 +242,7 @@ mod tests {
#[test]
fn test_default() {
let no_alloc_string: NoAllocString = Default::default();
assert_eq!(no_alloc_string.as_str(), "");
assert_eq!(no_alloc_string.as_str().unwrap(), "");
}

#[test]
Expand Down

0 comments on commit f48e785

Please sign in to comment.