ParkMyCar · ParkMyCar · Feb 9, 2022 · Feb 5, 2022 · Feb 5, 2022 · Feb 5, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -9,6 +9,7 @@ name: CI
 
 env:
   RUSTFLAGS: "-D warnings"
+  PROPTEST_CASES: 10000
 
 jobs:
   check:
@@ -29,8 +30,6 @@ jobs:
   test:
     name: cargo test
     runs-on: ubuntu-latest
-    env:
-      PROPTEST_CASES: 500
     steps:
       - uses: actions/checkout@v2
       - uses: actions-rs/toolchain@v1
@@ -41,13 +40,11 @@ jobs:
       - uses: actions-rs/cargo@v1
         with:
           command: test
-          args: --all-features
+          args: --release --all-features -- --include-ignored
 
   test-nightly:
     name: cargo test nightly
     runs-on: ubuntu-latest
-    env:
-      PROPTEST_CASES: 500
     steps:
       - uses: actions/checkout@v2
       - uses: actions-rs/toolchain@v1
@@ -58,7 +55,7 @@ jobs:
       - uses: actions-rs/cargo@v1
         with:
           command: test
-          args: --all-features
+          args: --release --all-features -- --include-ignored
 
   miri:
     name: cargo miri test
@@ -73,7 +70,7 @@ jobs:
           components: miri
       - name: Run Miri
         run: |
-          cargo miri test
+          cargo miri test --all-features
 
   fuzz-creation:
     name: fuzz creation

diff --git a/.github/workflows/msrv.yml b/.github/workflows/msrv.yml
@@ -8,8 +8,6 @@ on:
 name: MSRV
 
 env:
-  # local default for proptest is 100
-  PROPTEST_CASES: 1000
   RUSTFLAGS: "-D warnings"
 
 jobs:

diff --git a/compact_str/Cargo.toml b/compact_str/Cargo.toml
@@ -38,3 +38,7 @@ harness = false
 [[bench]]
 name = "comparison"
 harness = false
+
+[[bench]]
+name = "random"
+harness = false
diff --git a/compact_str/benches/random.rs b/compact_str/benches/random.rs
@@ -0,0 +1,37 @@
+//! Random benchmarks to determine if one bit of code is faster than another
+
+use criterion::{
+    black_box,
+    criterion_group,
+    criterion_main,
+    Criterion,
+};
+
+fn if_statement_min(c: &mut Criterion) {
+    let mask = 192;
+    let vals: [u8; 4] = [0, 46, 202, 255];
+    c.bench_function("if statement min", |b| {
+        b.iter(|| {
+            for x in vals {
+                let len = if x >= mask { (x & !mask) as usize } else { 24 };
+                black_box(len);
+            }
+        })
+    });
+}
+
+fn cmp_min(c: &mut Criterion) {
+    let mask = 192;
+    let vals: [u8; 4] = [0, 46, 202, 255];
+    c.bench_function("cmp min", |b| {
+        b.iter(|| {
+            for x in vals {
+                let len = core::cmp::min(x.wrapping_sub(mask), 24);
+                black_box(len);
+            }
+        })
+    });
+}
+
+criterion_group!(random, if_statement_min, cmp_min);
+criterion_main!(random);
diff --git a/compact_str/fuzz/fuzz_targets/creation.rs b/compact_str/fuzz/fuzz_targets/creation.rs
@@ -27,9 +27,7 @@ fuzz_target!(|method: CreationMethod<'_>| {
             assert_eq!(compact, word);
 
             // assert the CompactStr is properly allocated
-            if word.len() < MAX_INLINE_LENGTH {
-                assert!(!compact.is_heap_allocated());
-            } else if word.len() == MAX_INLINE_LENGTH && word.as_bytes()[0] <= 127 {
+            if word.len() <= MAX_INLINE_LENGTH {
                 assert!(!compact.is_heap_allocated());
             } else {
                 assert!(compact.is_heap_allocated());
@@ -43,10 +41,7 @@ fuzz_target!(|method: CreationMethod<'_>| {
             assert_eq!(compact, std_str);
 
             // assert the CompactStr is properly allocated
-            //
-            // Note: Creating a CompactStr from an iterator doesn't yet support the Packed
-            // representation, so we can only inline MAX_INLINE_LENGTH - 1
-            if std_str.len() < MAX_INLINE_LENGTH {
+            if std_str.len() <= MAX_INLINE_LENGTH {
                 assert!(!compact.is_heap_allocated());
             } else {
                 assert!(compact.is_heap_allocated());
@@ -60,10 +55,7 @@ fuzz_target!(|method: CreationMethod<'_>| {
             assert_eq!(compact, std_str);
 
             // assert the CompactStr is properly allocated
-            //
-            // Note: Creating a CompactStr from an iterator doesn't yet support the Packed
-            // representation, so we can only inline MAX_INLINE_LENGTH - 1
-            if std_str.len() < MAX_INLINE_LENGTH {
+            if std_str.len() <= MAX_INLINE_LENGTH {
                 assert!(!compact.is_heap_allocated());
             } else {
                 assert!(compact.is_heap_allocated());
@@ -81,9 +73,7 @@ fuzz_target!(|method: CreationMethod<'_>| {
                     assert_eq!(c, s);
 
                     // assert the CompactStr is properly allocated
-                    if s.len() < MAX_INLINE_LENGTH {
-                        assert!(!c.is_heap_allocated());
-                    } else if s.len() == MAX_INLINE_LENGTH && s.as_bytes()[0] <= 127 {
+                    if s.len() <= MAX_INLINE_LENGTH {
                         assert!(!c.is_heap_allocated());
                     } else {
                         assert!(c.is_heap_allocated());

diff --git a/compact_str/src/features/bytes.rs b/compact_str/src/features/bytes.rs
@@ -74,7 +74,7 @@ mod test {
 
     use crate::CompactStr;
 
-    const MAX_INLINED_SIZE: usize = core::mem::size_of::<String>();
+    const MAX_SIZE: usize = core::mem::size_of::<String>();
 
     // generates random unicode strings, upto 80 chars long
     fn rand_unicode() -> impl Strategy<Value = String> {
@@ -98,9 +98,7 @@ mod test {
             let mut buf = Cursor::new(word.as_bytes());
             let compact = CompactStr::from_utf8_buf(&mut buf).unwrap();
 
-            if word.len() < MAX_INLINED_SIZE {
-                prop_assert!(!compact.is_heap_allocated())
-            } else if word.len() == MAX_INLINED_SIZE && word.as_bytes()[0] <= 127 {
+            if word.len() <= MAX_SIZE {
                 prop_assert!(!compact.is_heap_allocated())
             } else {
                 prop_assert!(compact.is_heap_allocated())

diff --git a/compact_str/src/lib.rs b/compact_str/src/lib.rs
@@ -9,7 +9,7 @@
 //! 3. A `usize` denoting the capacity of the string
 //!
 //! On 64-bit architectures this results in 24 bytes being stored on the stack (12 bytes for 32-bit
-//! architectures). For small strings, e.g. <= 23 characters, instead of storing a pointer, length,
+//! architectures). For small strings, e.g. <= 24 characters, instead of storing a pointer, length,
 //! and capacity on the stack, you store the string itself! This avoids the need to heap allocate
 //! which reduces the amount of memory used, and improves performance.
 
@@ -165,7 +165,7 @@ impl CompactStr {
     /// ```
     /// # use compact_str::CompactStr;
     /// let empty = CompactStr::with_capacity(0);
-    /// let min_size = std::mem::size_of::<String>() - 1;
+    /// let min_size = std::mem::size_of::<String>();
     ///
     /// assert_eq!(empty.capacity(), min_size);
     /// assert_ne!(0, min_size);
@@ -175,10 +175,10 @@ impl CompactStr {
     /// ### Heap Allocating
     /// ```
     /// # use compact_str::CompactStr;
-    /// // If you create a `CompactStr` with a capacity greater than or equal to
+    /// // If you create a `CompactStr` with a capacity greater than
     /// // `std::mem::size_of::<String>`, it will heap allocated
     ///
-    /// let heap_size = std::mem::size_of::<String>();
+    /// let heap_size = std::mem::size_of::<String>() + 1;
     /// let empty = CompactStr::with_capacity(heap_size);
     ///
     /// assert_eq!(empty.capacity(), heap_size);
@@ -316,7 +316,7 @@ impl CompactStr {
     /// ```
     #[inline]
     pub fn as_bytes(&self) -> &[u8] {
-        self.repr.as_slice()
+        &self.repr.as_slice()[..self.len()]
     }
 
     // TODO: Implement a `try_as_mut_slice(...)` that will fail if it results in cloning?

diff --git a/compact_str/src/repr/bytes.rs b/compact_str/src/repr/bytes.rs
@@ -4,24 +4,18 @@ use bytes::Buf;
 
 use super::{
     Repr,
+    HEAP_MASK,
     MAX_SIZE,
 };
 
-#[cfg(target_pointer_width = "32")]
-const DEFAULT_TEXT: &str = "000000000000";
-#[cfg(target_pointer_width = "64")]
-const DEFAULT_TEXT: &str = "000000000000000000000000";
-
-const DEFAULT_PACKED: Repr = Repr::new_const(DEFAULT_TEXT);
-
 impl Repr {
     /// Converts a buffer of bytes to a `Repr`,
     pub fn from_utf8_buf<B: Buf>(buf: &mut B) -> Result<Self, Utf8Error> {
         // SAFETY: We check below to make sure the provided buffer is valid UTF-8
-        let repr = unsafe { Self::from_utf8_buf_unchecked(buf) };
+        let (repr, bytes_written) = unsafe { Self::collect_buf(buf) };
 
         // Check to make sure the provided bytes are valid UTF-8, return the Repr if they are!
-        match core::str::from_utf8(repr.as_slice()) {
+        match core::str::from_utf8(&repr.as_slice()[..bytes_written]) {
             Ok(_) => Ok(repr),
             Err(e) => Err(e),
         }
@@ -32,46 +26,53 @@ impl Repr {
     /// # Safety
     /// The provided buffer must be valid UTF-8
     pub unsafe fn from_utf8_buf_unchecked<B: Buf>(buf: &mut B) -> Self {
-        let size = buf.remaining();
-        let chunk = buf.chunk();
-
-        // Check to make sure we're not empty, so accessing the first byte below doesn't panic
-        if chunk.is_empty() {
-            // If the chunk is empty, then we should have 0 remaining bytes
-            debug_assert_eq!(size, 0);
-            return super::EMPTY;
+        let (repr, _bytes_written) = Self::collect_buf(buf);
+        repr
+    }
+
+    unsafe fn collect_buf<B: Buf>(buf: &mut B) -> (Self, usize) {
+        // Get an empty Repr we can write into
+        let mut repr = super::EMPTY;
+        let mut bytes_written = 0;
+
+        debug_assert_eq!(repr.len(), bytes_written);
+
+        while buf.has_remaining() {
+            let chunk = buf.chunk();
+            let chunk_len = chunk.len();
+
+            // There's an edge case where the final byte of this buffer == `HEAP_MASK`, which is
+            // invalid UTF-8, but would result in us creating an inline variant, that identifies as
+            // a heap variant. If a user ever tried to reference the data at all, we'd incorrectly
+            // try and read data from an invalid memory address, causing undefined behavior.
+            if bytes_written < MAX_SIZE && bytes_written + chunk_len == MAX_SIZE {
+                let last_byte = chunk[chunk_len - 1];
+                // If we hit the edge case, reserve additional space to make the repr becomes heap
+                // allocated, which prevents us from writing this last byte inline
+                if last_byte == HEAP_MASK {
+                    repr.reserve(MAX_SIZE + 1);
+                }
+            }
+
+            // reserve at least enough space to fit this chunk
+            repr.reserve(chunk_len);
+
+            // SAFETY: The caller is responsible for making sure the provided buffer is UTF-8. This
+            // invariant is documented in the public API
+            let slice = repr.as_mut_slice();
+            // write the chunk into the Repr
+            slice[bytes_written..bytes_written + chunk_len].copy_from_slice(chunk);
+
+            // Set the length of the Repr
+            // SAFETY: We just wrote an additional `chunk_len` bytes into the Repr
+            bytes_written += chunk_len;
+            repr.set_len(bytes_written);
+
+            // advance the pointer of the buffer
+            buf.advance(chunk_len);
         }
-        let first_byte = buf.chunk()[0];
-
-        // Get an "empty" Repr we can write into
-        //
-        // HACK: There currently isn't a way to provide an "empty" Packed repr, so we do this check
-        // and return a "default" Packed repr if the buffer can fit
-        let mut repr = if size == MAX_SIZE && first_byte <= 127 {
-            // Note: No need to reserve additional bytes here, because we know we can fit all
-            // remaining bytes of `buf` into a Packed repr
-            DEFAULT_PACKED
-        } else {
-            let mut default = super::EMPTY;
-            debug_assert_eq!(default.len(), 0);
-
-            // Reserve enough bytes, possibly allocating on the heap, to store the text
-            default.reserve(size);
-
-            default
-        };
-
-        // SAFETY: The caller is responsible for making sure the provided buffer is UTF-8. This
-        // invariant is documented in the public API
-        let slice = repr.as_mut_slice();
-        // Copy the bytes from the buffer into our Repr!
-        buf.copy_to_slice(&mut slice[..size]);
-
-        // Set the length of the Repr
-        // SAFETY: We just wrote `size` bytes into the Repr
-        repr.set_len(size);
 
-        repr
+        (repr, bytes_written)
     }
 }
 
@@ -124,6 +125,36 @@ mod test {
         assert!(!repr.is_heap_allocated());
     }
 
+    #[test]
+    fn test_fuzz_panic() {
+        let bytes = &[
+            255, 255, 255, 255, 255, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1, 12, 0, 0, 96,
+        ];
+        let mut buf: Cursor<&[u8]> = Cursor::new(bytes);
+
+        assert!(Repr::from_utf8_buf(&mut buf).is_err());
+    }
+
+    #[test]
+    fn test_valid_repr_but_invalid_utf8() {
+        let bytes = &[
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192,
+        ];
+        let mut buf: Cursor<&[u8]> = Cursor::new(bytes);
+
+        assert!(Repr::from_utf8_buf(&mut buf).is_err());
+    }
+
+    #[test]
+    fn test_fake_heap_variant() {
+        let bytes = &[
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255,
+        ];
+        let mut buf: Cursor<&[u8]> = Cursor::new(bytes);
+
+        assert!(Repr::from_utf8_buf(&mut buf).is_err());
+    }
+
     #[test]
     #[should_panic(expected = "Utf8Error")]
     fn test_invalid_utf8() {