ParkMyCar · ParkMyCar · Jan 9, 2022 · Jan 3, 2022 · Jan 3, 2022 · Jan 3, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -78,6 +78,21 @@ jobs:
         run: |
           cargo miri test
 
+  example-bytes:
+    name: example - bytes
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: nightly
+          override: true
+      - uses: actions-rs/cargo@v1
+        with:
+          command: run
+          args: --manifest-path examples/bytes/Cargo.toml
+
   example-serde:
     name: example - serde
     runs-on: ubuntu-latest

diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,2 @@
-/target
-Cargo.lock
+**/target
+**/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,2 +1,2 @@
 [workspace]
-members = ["examples/serde", "compact_str", "tracing_alloc"]
+members = ["examples/bytes", "examples/serde", "compact_str", "tracing_alloc"]
diff --git a/compact_str/Cargo.toml b/compact_str/Cargo.toml
@@ -14,6 +14,7 @@ categories = ["encoding", "parsing", "memory-management", "text-processing"]
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
+bytes = { version = "1", optional = true }
 serde = { version = "1", optional = true }
 static_assertions = "1"
 

diff --git a/compact_str/src/features/bytes.rs b/compact_str/src/features/bytes.rs
@@ -0,0 +1,125 @@
+use core::str::Utf8Error;
+
+use bytes::Buf;
+
+use crate::{
+    CompactStr,
+    Repr,
+};
+
+impl CompactStr {
+    /// Converts a buffer of bytes to a `CompactStr`
+    ///
+    /// # Examples
+    /// ### Basic usage
+    /// ```
+    /// # use compact_str::CompactStr;
+    /// # use std::collections::VecDeque;
+    ///
+    /// // `bytes::Buf` is implemented for `VecDeque<u8>`
+    /// let mut sparkle_heart = VecDeque::from(vec![240, 159, 146, 150]);
+    /// // We know these bytes are valid, so we can `.unwrap()` or `.expect(...)` here
+    /// let compact_str = CompactStr::from_utf8_buf(&mut sparkle_heart).expect("valid utf-8");
+    ///
+    /// assert_eq!(compact_str, "💖");
+    /// ```
+    ///
+    /// ### With invalid/non-UTF8 bytes
+    /// ```
+    /// # use compact_str::CompactStr;
+    /// # use std::io;
+    ///
+    /// // `bytes::Buf` is implemented for `std::io::Cursor<&[u8]>`
+    /// let mut invalid = io::Cursor::new(&[0, 159]);
+    ///
+    /// // The provided buffer is invalid, so trying to create a `ComapctStr` will fail
+    /// assert!(CompactStr::from_utf8_buf(&mut invalid).is_err());
+    /// ```
+    pub fn from_utf8_buf<B: Buf>(buf: &mut B) -> Result<Self, Utf8Error> {
+        Repr::from_utf8_buf(buf).map(|repr| CompactStr { repr })
+    }
+
+    /// Converts a buffer of bytes to a `CompactStr`, without checking that the provided buffer is
+    /// valid UTF-8.
+    ///
+    /// # Safety
+    /// This function is unsafe because it does not check that the provided bytes are valid UTF-8.
+    /// If this constraint is violated, it may cause memory unsafety issues with futures uses of
+    /// the `ComapctStr`, as the rest of the library assumes that `CompactStr`s are valid UTF-8
+    ///
+    /// # Examples
+    /// ```
+    /// # use compact_str::CompactStr;
+    /// # use std::io;
+    ///
+    /// let word = "hello world";
+    /// // `bytes::Buf` is implemented for `std::io::Cursor<&[u8]>`
+    /// let mut buffer = io::Cursor::new(word.as_bytes());
+    /// let compact_str = unsafe { CompactStr::from_utf8_buf_unchecked(&mut buffer) };
+    ///
+    /// assert_eq!(compact_str, word);
+    /// ```
+    pub unsafe fn from_utf8_buf_unchecked<B: Buf>(buf: &mut B) -> Self {
+        let repr = Repr::from_utf8_buf_unchecked(buf);
+        CompactStr { repr }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::io::Cursor;
+
+    use proptest::prelude::*;
+    use proptest::strategy::Strategy;
+
+    use crate::CompactStr;
+
+    const MAX_INLINED_SIZE: usize = core::mem::size_of::<String>();
+
+    // generates random unicode strings, upto 80 chars long
+    fn rand_unicode() -> impl Strategy<Value = String> {
+        proptest::collection::vec(proptest::char::any(), 0..80)
+            .prop_map(|v| v.into_iter().collect())
+    }
+
+    proptest! {
+        #[test]
+        #[cfg_attr(miri, ignore)]
+        fn test_buffers_roundtrip(word in rand_unicode()) {
+            let mut buf = Cursor::new(word.as_bytes());
+            let compact = CompactStr::from_utf8_buf(&mut buf).unwrap();
+
+            prop_assert_eq!(&word, &compact);
+        }
+
+        #[test]
+        #[cfg_attr(miri, ignore)]
+        fn test_allocated_properly(word in rand_unicode()) {
+            let mut buf = Cursor::new(word.as_bytes());
+            let compact = CompactStr::from_utf8_buf(&mut buf).unwrap();
+
+            if word.len() < MAX_INLINED_SIZE {
+                prop_assert!(!compact.is_heap_allocated())
+            } else if word.len() == MAX_INLINED_SIZE && word.as_bytes()[0] <= 127 {
+                prop_assert!(!compact.is_heap_allocated())
+            } else {
+                prop_assert!(compact.is_heap_allocated())
+            }
+        }
+
+        #[test]
+        #[cfg_attr(miri, ignore)]
+        fn test_only_accept_valid_utf8(bytes in proptest::collection::vec(any::<u8>(), 0..80)) {
+            let mut buf = Cursor::new(bytes.as_slice());
+
+            let compact_result = CompactStr::from_utf8_buf(&mut buf);
+            let str_result = core::str::from_utf8(bytes.as_slice());
+
+            match (compact_result, str_result) {
+                (Ok(c), Ok(s)) => prop_assert_eq!(c, s),
+                (Err(c_err), Err(s_err)) => prop_assert_eq!(c_err, s_err),
+                _ => panic!("CompactStr and core::str read UTF-8 differently?"),
+            }
+        }
+    }
+}
diff --git a/compact_str/src/features/mod.rs b/compact_str/src/features/mod.rs
@@ -0,0 +1,6 @@
+//! A module that contains the implementations for optional features. For example `serde` support
+
+#[cfg(feature = "bytes")]
+mod bytes;
+#[cfg(feature = "serde")]
+mod serde;
diff --git a/compact_str/src/serde.rs → compact_str/src/features/serde.rs b/compact_str/src/serde.rs → compact_str/src/features/serde.rs
@@ -7,7 +7,7 @@ use serde::de::{
     Visitor,
 };
 
-use super::CompactStr;
+use crate::CompactStr;
 
 fn compact_str<'de: 'a, 'a, D: Deserializer<'de>>(deserializer: D) -> Result<CompactStr, D::Error> {
     struct CompactStrVisitor;

diff --git a/compact_str/src/lib.rs b/compact_str/src/lib.rs
@@ -22,12 +22,11 @@ use core::iter::FromIterator;
 use core::ops::Deref;
 use core::str::FromStr;
 
+mod features;
+
 mod repr;
 use repr::Repr;
 
-#[cfg(feature = "serde")]
-mod serde;
-
 #[cfg(test)]
 mod tests;
 
@@ -134,6 +133,11 @@ impl CompactStr {
         self.repr.as_str()
     }
 
+    #[inline]
+    pub fn as_slice(&self) -> &[u8] {
+        self.repr.as_slice()
+    }
+
     // TODO: Implement a `try_as_mut_slice(...)` that will fail if it results in cloning?
     //
     /// Provides a mutable reference to the underlying buffer of bytes.

diff --git a/compact_str/src/repr/bytes.rs b/compact_str/src/repr/bytes.rs
@@ -0,0 +1,135 @@
+use core::str::Utf8Error;
+
+use bytes::Buf;
+
+use super::{
+    Repr,
+    MAX_SIZE,
+};
+
+#[cfg(target_pointer_width = "32")]
+const DEFAULT_TEXT: &str = "000000000000";
+#[cfg(target_pointer_width = "64")]
+const DEFAULT_TEXT: &str = "000000000000000000000000";
+
+const DEFAULT_PACKED: Repr = Repr::new_const(DEFAULT_TEXT);
+
+impl Repr {
+    /// Converts a buffer of bytes to a `Repr`,
+    pub fn from_utf8_buf<B: Buf>(buf: &mut B) -> Result<Self, Utf8Error> {
+        // SAFETY: We check below to make sure the provided buffer is valid UTF-8
+        let repr = unsafe { Self::from_utf8_buf_unchecked(buf) };
+
+        // Check to make sure the provided bytes are valid UTF-8, return the Repr if they are!
+        match core::str::from_utf8(repr.as_slice()) {
+            Ok(_) => Ok(repr),
+            Err(e) => Err(e),
+        }
+    }
+
+    /// Converts a buffer of bytes to a `Repr`, without checking for valid UTF-8
+    ///
+    /// # Safety
+    /// The provided buffer must be valid UTF-8
+    pub unsafe fn from_utf8_buf_unchecked<B: Buf>(buf: &mut B) -> Self {
+        let size = buf.remaining();
+        let chunk = buf.chunk();
+
+        // Check to make sure we're not empty, so accessing the first byte below doesn't panic
+        if chunk.is_empty() {
+            // If the chunk is empty, then we should have 0 remaining bytes
+            debug_assert_eq!(size, 0);
+            return super::EMPTY;
+        }
+        let first_byte = buf.chunk()[0];
+
+        // Get an "empty" Repr we can write into
+        //
+        // HACK: There currently isn't a way to provide an "empty" Packed repr, so we do this check
+        // and return a "default" Packed repr if the buffer can fit
+        let mut repr = if size == MAX_SIZE && first_byte <= 127 {
+            // Note: No need to reserve additional bytes here, because we know we can fit all
+            // remaining bytes of `buf` into a Packed repr
+            DEFAULT_PACKED
+        } else {
+            let mut default = super::EMPTY;
+            debug_assert_eq!(default.len(), 0);
+
+            // Reserve enough bytes, possibly allocating on the heap, to store the text
+            default.reserve(size);
+
+            default
+        };
+
+        // SAFETY: The caller is responsible for making sure the provided buffer is UTF-8. This
+        // invariant is documented in the public API
+        let slice = repr.as_mut_slice();
+        // Copy the bytes from the buffer into our Repr!
+        buf.copy_to_slice(&mut slice[..size]);
+
+        // Set the length of the Repr
+        // SAFETY: We just wrote `size` bytes into the Repr
+        repr.set_len(size);
+
+        repr
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::io::Cursor;
+
+    use super::Repr;
+
+    #[test]
+    fn test_smoke() {
+        let word = "hello world";
+        let mut buf = Cursor::new(word.as_bytes());
+
+        let repr = Repr::from_utf8_buf(&mut buf).unwrap();
+        assert_eq!(repr.as_str(), word);
+    }
+
+    #[test]
+    fn test_heap_allocated() {
+        let word = "hello, this is a long string which should be heap allocated";
+        let mut buf = Cursor::new(word.as_bytes());
+
+        let repr = Repr::from_utf8_buf(&mut buf).unwrap();
+        assert_eq!(repr.as_str(), word);
+    }
+
+    #[test]
+    fn test_empty() {
+        let mut buf: Cursor<&[u8]> = Cursor::new(&[]);
+
+        let repr = Repr::from_utf8_buf(&mut buf).unwrap();
+        assert_eq!(repr.len(), 0);
+        assert_eq!(repr.as_str(), "");
+    }
+
+    #[test]
+    fn test_packed() {
+        #[cfg(target_pointer_width = "64")]
+        let packed = "this string is 24 chars!";
+        #[cfg(target_pointer_width = "32")]
+        let packed = "i am 12 char";
+
+        let mut buf = Cursor::new(packed.as_bytes());
+
+        let repr = Repr::from_utf8_buf(&mut buf).unwrap();
+        assert_eq!(repr.as_str(), packed);
+
+        // This repr should __not__ be heap allocated
+        assert!(!repr.is_heap_allocated());
+    }
+
+    #[test]
+    #[should_panic(expected = "Utf8Error")]
+    fn test_invalid_utf8() {
+        let invalid = &[0, 159];
+        let mut buf: Cursor<&[u8]> = Cursor::new(invalid);
+
+        Repr::from_utf8_buf(&mut buf).unwrap();
+    }
+}
diff --git a/compact_str/src/repr/heap/arc.rs b/compact_str/src/repr/heap/arc.rs
@@ -22,6 +22,8 @@ pub struct ArcString {
     len: usize,
     ptr: ptr::NonNull<ArcStringInner>,
 }
+unsafe impl Sync for ArcString {}
+unsafe impl Send for ArcString {}
 
 impl ArcString {
     #[inline]
@@ -54,11 +56,14 @@ impl ArcString {
 
     #[inline]
     pub fn as_str(&self) -> &str {
-        let buffer = self.inner().as_bytes();
-
         // SAFETY: The only way you can construct an `ArcString` is via a `&str` so it must be valid
         // UTF-8, or the caller has manually made those guarantees
-        unsafe { str::from_utf8_unchecked(&buffer[..self.len]) }
+        unsafe { str::from_utf8_unchecked(self.as_slice()) }
+    }
+
+    #[inline(always)]
+    pub fn as_slice(&self) -> &[u8] {
+        &self.inner().as_bytes()[..self.len]
     }
 
     #[inline]

diff --git a/compact_str/src/repr/inline.rs b/compact_str/src/repr/inline.rs
@@ -81,11 +81,13 @@ impl InlineString {
 
     #[inline]
     pub fn as_str(&self) -> &str {
-        let len = self.len();
-        let slice = &self.buffer[..len];
-
         // SAFETY: You can only construct an InlineString via a &str
-        unsafe { ::std::str::from_utf8_unchecked(slice) }
+        unsafe { ::std::str::from_utf8_unchecked(self.as_slice()) }
+    }
+
+    #[inline(always)]
+    pub fn as_slice(&self) -> &[u8] {
+        &self.buffer[..self.len()]
     }
 
     /// Provides a mutable reference to the underlying buffer