From 2750498954ef7ab4ebc8c786b091d11b027121d9 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Wed, 15 Feb 2023 22:48:05 -0500 Subject: [PATCH 1/3] branch start, make From and From> eagerly inline, add from_string_buffer API --- compact_str/src/lib.rs | 84 +++++++++++++++++++++++++- compact_str/src/repr/iter.rs | 6 +- compact_str/src/repr/mod.rs | 112 ++++++++++++++++++++++++----------- fuzz/src/creation.rs | 37 ++++++------ 4 files changed, 182 insertions(+), 57 deletions(-) diff --git a/compact_str/src/lib.rs b/compact_str/src/lib.rs index 95405f72..37a10663 100644 --- a/compact_str/src/lib.rs +++ b/compact_str/src/lib.rs @@ -1379,6 +1379,85 @@ impl CompactString { pub fn into_string(self) -> String { self.0.into_string() } + + /// Convert a [`String`] into a [`CompactString`] _without inlining_. + /// + /// Note: You probably don't need to use this method, instead you should use `From` + /// which is implemented for [`CompactString`]. + /// + /// This method exists incase your code is very sensitive to memory allocations. Normally when + /// converting a [`String`] to a [`CompactString`] we'll inline short strings onto the stack. + /// But this results in [`Drop`]-ing the original [`String`], which causes memory it owned on + /// the heap to be deallocated. Instead when using this method, we always reuse the buffer that + /// was previously owned by the [`String`], so no trips to the allocator are needed. + /// + /// # Examples + /// + /// ### Short Strings + /// ``` + /// use compact_str::CompactString; + /// + /// let x = "hello world".to_string(); + /// let c_inline = CompactString::from(x); + /// + /// // using From, we create an inlined CompactString + /// assert!(!c_inline.is_heap_allocated()); + /// + /// let y = "hello world".to_string(); + /// let c_heap = CompactString::from_string_buffer(y); + /// + /// // using CompactString::from_string_buffer, we'll re-use the String's underlying buffer + /// assert!(c_heap.is_heap_allocated()); + /// ``` + /// + /// ### Longer Strings + /// ``` + /// use compact_str::CompactString; + /// + /// let x = "longer string that will be on the heap".to_string(); + /// let c1 = CompactString::from(x); + /// + /// let y = "longer string that will be on the heap".to_string(); + /// let c2 = CompactString::from_string_buffer(y); + /// + /// // for longer strings, we re-use the underlying String's buffer in both cases + /// assert!(c1.is_heap_allocated()); + /// assert!(c2.is_heap_allocated()); + /// ``` + /// + /// ### `Clone`-ing + /// ``` + /// use compact_str::CompactString; + /// + /// let x = "hello world".to_string(); + /// let c_heap = CompactString::from_string_buffer(x); + /// assert!(c_heap.is_heap_allocated()); + /// + /// // when cloning, if a string is heap allocated but short, we'll inline the cloned version + /// let c_inline = c_heap.clone(); + /// assert!(!c_inline.is_heap_allocated()); + /// ``` + /// + /// ### Buffer Re-use + /// ``` + /// use compact_str::CompactString; + /// + /// let og = "hello world".to_string(); + /// let og_addr = og.as_ptr(); + /// + /// let c = CompactString::from_string_buffer(og); + /// let ex = String::from(c); + /// let ex_addr = ex.as_ptr(); + /// + /// // When converting to/from String and CompactString, we re-use the same underlying allocated + /// // memory/buffer + /// assert_eq!(og_addr, ex_addr); + /// ``` + #[inline] + pub fn from_string_buffer(s: String) -> Self { + let repr = Repr::from_string(s, false); + CompactString(repr) + } } impl Default for CompactString { @@ -1492,7 +1571,7 @@ impl<'a> From<&'a str> for CompactString { impl From for CompactString { fn from(s: String) -> Self { - let repr = Repr::from_string(s); + let repr = Repr::from_string(s, true); CompactString(repr) } } @@ -1507,6 +1586,7 @@ impl<'a> From> for CompactString { fn from(cow: Cow<'a, str>) -> Self { match cow { Cow::Borrowed(s) => s.into(), + // we separate these two so we can re-use the underlying buffer in the owned case Cow::Owned(s) => s.into(), } } @@ -1514,7 +1594,7 @@ impl<'a> From> for CompactString { impl From> for CompactString { fn from(b: Box) -> Self { - let repr = Repr::from_box_str(b); + let repr = Repr::from_box_str(b, true); CompactString(repr) } } diff --git a/compact_str/src/repr/iter.rs b/compact_str/src/repr/iter.rs index 18bb7771..e25efd3c 100644 --- a/compact_str/src/repr/iter.rs +++ b/compact_str/src/repr/iter.rs @@ -17,7 +17,7 @@ impl FromIterator for Repr { // If the size hint indicates we can't store this inline, then create a heap string let (size_hint, _) = iter.size_hint(); if size_hint > MAX_SIZE { - return Repr::from_string(iter.collect()); + return Repr::from_string(iter.collect(), true); } // Otherwise, continuously pull chars from the iterator @@ -41,7 +41,7 @@ impl FromIterator for Repr { // extend heap with remaining characters string.extend(iter); - return Repr::from_string(string); + return Repr::from_string(string, true); } // write the current char into a slice of the unoccupied space @@ -91,7 +91,7 @@ where // extend heap with remaining strings string.extend(iter); - return Repr::from_string(string); + return Repr::from_string(string, true); } // write the current string into a slice of the unoccupied space diff --git a/compact_str/src/repr/mod.rs b/compact_str/src/repr/mod.rs index 84fc7428..ef1eccd9 100644 --- a/compact_str/src/repr/mod.rs +++ b/compact_str/src/repr/mod.rs @@ -140,12 +140,13 @@ impl Repr { repr } - /// Create a [`Repr`] from a [`String`], in `O(1)` time. + /// Create a [`Repr`] from a [`String`], in `O(1)` time. We'll attempt to inline the string + /// if `should_inline` is `true` /// /// Note: If the provided [`String`] is >16 MB and we're on a 32-bit arch, we'll copy the /// `String`. #[inline] - pub fn from_string(s: String) -> Self { + pub fn from_string(s: String, should_inline: bool) -> Self { let og_cap = s.capacity(); let cap = Capacity::new(og_cap); @@ -165,9 +166,14 @@ impl Repr { // We only hit this case if the provided String is > 16MB and we're on a 32-bit arch. We // expect it to be unlikely, thus we hint that to the compiler capacity_on_heap(s) - } else if og_cap == 0 { + } else if s.is_empty() { // We don't expect converting from an empty String often, so we make this code path cold empty() + } else if should_inline && s.len() <= MAX_SIZE { + // SAFETY: Checked to make sure the length of s will fit inline + let inline = unsafe { InlineBuffer::new(s.as_str()) }; + // SAFETY: `InlineBuffer` and `Repr` are the same size + unsafe { mem::transmute(inline) } } else { let mut s = mem::ManuallyDrop::new(s.into_bytes()); let len = s.len(); @@ -232,7 +238,7 @@ impl Repr { } #[inline] - pub fn from_box_str(s: Box) -> Self { + pub fn from_box_str(s: Box, should_inline: bool) -> Self { let og_cap = s.len(); let cap = Capacity::new(og_cap); @@ -255,6 +261,11 @@ impl Repr { } else if og_cap == 0 { // We don't expect converting from an empty String often, so we make this code path cold empty() + } else if should_inline && og_cap <= MAX_SIZE { + // SAFETY: Checked to make sure the length of s will fit inline + let inline = unsafe { InlineBuffer::new(&s) }; + // SAFETY: `InlineBuffer` and `Repr` are the same size + unsafe { mem::transmute(inline) } } else { // Don't drop the box here let raw_ptr = Box::into_raw(s).cast::(); @@ -687,21 +698,46 @@ mod tests { } } - #[test_case(String::new(); "empty")] - #[test_case(String::from("nyc 🗽"); "short")] - #[test_case(String::from("this is a really long string, which is intended"); "long")] - fn test_from_string(s: String) { - let r = Repr::from_string(s.clone()); + #[test_case(String::new(), true; "empty should inline")] + #[test_case(String::new(), false; "empty not inline")] + #[test_case(String::from("nyc 🗽"), true; "short should inline")] + #[test_case(String::from("nyc 🗽"), false ; "short not inline")] + #[test_case(String::from("this is a really long string, which is intended"), true; "long")] + #[test_case(String::from("this is a really long string, which is intended"), false; "long not inline")] + fn test_from_string(s: String, try_to_inline: bool) { + let r = Repr::from_string(s.clone(), try_to_inline); + assert_eq!(r.len(), s.len()); assert_eq!(r.as_str(), s.as_str()); + + if s.is_empty() { + // we should always inline the string, if the length of the source string is 0 + assert!(!r.is_heap_allocated()); + } else if s.len() <= MAX_SIZE { + // we should inline the string, if we were asked to + assert_eq!(!r.is_heap_allocated(), try_to_inline); + } else { + assert!(r.is_heap_allocated()); + } } #[quickcheck] #[cfg_attr(miri, ignore)] - fn quickcheck_from_string(s: String) { - let r = Repr::from_string(s.clone()); + fn quickcheck_from_string(s: String, try_to_inline: bool) { + let r = Repr::from_string(s.clone(), try_to_inline); + assert_eq!(r.len(), s.len()); assert_eq!(r.as_str(), s.as_str()); + + if s.is_empty() { + // we should always inline the string, if the length of the source string is 0 + assert!(!r.is_heap_allocated()); + } else if s.len() <= MAX_SIZE { + // we should inline the string, if we were asked to + assert_eq!(!r.is_heap_allocated(), try_to_inline); + } else { + assert!(r.is_heap_allocated()); + } } #[test_case(""; "empty")] @@ -833,35 +869,41 @@ mod tests { assert_eq!(r_a.is_heap_allocated(), r_b.is_heap_allocated()); } - #[test_case("q"; "single")] - #[test_case("abc"; "short")] - #[test_case("this is (another) long string that will be heap allocated"; "long")] - #[test_case(EIGHTEEN_MB_STR; "huge")] - fn test_from_box_str(initial: &'static str) { + #[test_case("", true ; "empty should inline")] + #[test_case("", false ; "empty not inline")] + #[test_case("q", true ; "single should inline")] + #[test_case("q", false ; "single not inline")] + #[test_case("abc", true ; "short inline")] + #[test_case("abc", false ; "short not inline")] + #[test_case("this is (another) long string that will be heap allocated", true ; "long should inline")] + #[test_case("this is (another) long string that will be heap allocated", false ; "long not inline")] + #[test_case(EIGHTEEN_MB_STR, true ; "huge should inline")] + #[test_case(EIGHTEEN_MB_STR, false ; "huge not inline")] + fn test_from_box_str(initial: &'static str, try_to_inline: bool) { let box_str = String::from(initial).into_boxed_str(); - let r = Repr::from_box_str(box_str); + let r = Repr::from_box_str(box_str, try_to_inline); assert_eq!(r.as_str(), initial); assert_eq!(r.len(), initial.len()); - assert_eq!(r.capacity(), initial.len()); - - // when converting from a Box we do not automatically inline the string - assert!(r.is_heap_allocated()); - } - - #[test] - fn test_from_box_str_empty() { - let box_str = String::from("").into_boxed_str(); - - let r = Repr::from_box_str(box_str); - assert_eq!(r.as_str(), ""); - assert_eq!(r.len(), 0); - - // when converting from a Box we do not automatically inline the string, unless the - // Box is empty, then we return an empty inlined string - assert_eq!(r.capacity(), MAX_SIZE); - assert!(!r.is_heap_allocated()); + // when converting from a Box we optionally inline the string, if it would fit + if initial.is_empty() { + // we should always inline the string, if the length of the source string is 0 + assert!(!r.is_heap_allocated()); + assert_eq!(r.capacity(), MAX_SIZE); + } else if initial.len() <= MAX_SIZE { + // we should inline the string, if we were asked to + assert_eq!(!r.is_heap_allocated(), try_to_inline); + let expected_capacity = if try_to_inline { + MAX_SIZE + } else { + initial.len() + }; + assert_eq!(r.capacity(), expected_capacity); + } else { + assert!(r.is_heap_allocated()); + assert_eq!(r.capacity(), initial.len()); + } } } diff --git a/fuzz/src/creation.rs b/fuzz/src/creation.rs index e1d9c70a..57814657 100644 --- a/fuzz/src/creation.rs +++ b/fuzz/src/creation.rs @@ -65,11 +65,13 @@ pub enum Creation<'a> { FromStr(&'a str), /// Create using `FromStr` trait FromStrTrait(&'a str), - /// Create using `From`, which consumes the `String` for `O(1)` runtime + /// Create using `From`, which consumes the `String` and eagerly inlines FromString(String), - /// Create using `From>`, which consumes the `Box` for `O(1)` runtime + /// Create using [`CompactString::from_string_buffer`], which consumes the `String` in `O(1)` + FromStringBuffer(String), + /// Create using `From>`, which consumes the `Box` and eagerly inlines FromBoxStr(Box), - /// Create using `From>`, which possibly consumes an owned string in `O(1)` + /// Create using `From>`, which consumes an owned string and eagerly inlines FromCowStr(CowStrArg<'a>), /// Create from a type that implements [`ToCompactString`] ToCompactString(ToCompactStringArg), @@ -244,10 +246,19 @@ impl Creation<'_> { FromString(s) => { let compact = CompactString::from(s.clone()); + // Note: converting From will be eagerly inlined assert_eq!(compact, s); + assert_properly_allocated(&compact, &s); - // Note: converting From will always be heap allocated because we use the - // underlying buffer from the source String + Some((compact, s)) + } + FromStringBuffer(s) => { + let compact = CompactString::from_string_buffer(s.clone()); + + assert_eq!(compact, s); + + // Note: converting with from_string_buffer will always be heap allocated because we + // use the underlying buffer from the source String if s.capacity() == 0 { assert!(!compact.is_heap_allocated()); } else { @@ -258,18 +269,12 @@ impl Creation<'_> { } FromBoxStr(b) => { let compact = CompactString::from(b.clone()); - assert_eq!(compact, b); - // Note: converting From> will always be heap allocated because we use the - // underlying buffer from the source String - if b.len() == 0 { - assert!(!compact.is_heap_allocated()) - } else { - assert!(compact.is_heap_allocated()) - } - + // Note: converting From> will be eagerly inlined let string = String::from(b); + assert_properly_allocated(&compact, &string); + Some((compact, string)) } FromCowStr(cow_arg) => { @@ -291,9 +296,7 @@ impl Creation<'_> { let compact = CompactString::from(cow); assert_eq!(compact, std_str); - // Note: we don't assert properly allocated here because we might do an O(1) - // conversion from String, if the Cow is owned, and thus could end up with a small - // string on the heap + assert_properly_allocated(&compact, &std_str); Some((compact, std_str)) } From 6576db2da5ad5ff924b69afde68c9140fed919a8 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Thu, 16 Feb 2023 09:31:41 -0500 Subject: [PATCH 2/3] update unit tests --- compact_str/src/tests.rs | 107 ++++++++++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 25 deletions(-) diff --git a/compact_str/src/tests.rs b/compact_str/src/tests.rs index d1abc2c9..2ac5a462 100644 --- a/compact_str/src/tests.rs +++ b/compact_str/src/tests.rs @@ -909,57 +909,114 @@ fn test_into_string_where_32_bit_capacity_is_on_heap() { fn test_into_string_small_string_with_excess_capacity() { let mut string = String::with_capacity(128); string.push_str("abcdef"); - let str_addr = string.as_ptr(); let str_len = string.len(); - let str_cap = string.capacity(); let compact = CompactString::from(string); - let new_string = String::from(compact); - let new_str_addr = new_string.as_ptr(); - let new_str_len = new_string.len(); - let new_str_cap = new_string.capacity(); - // If small boxed strings are eagerly compacted, the address and capacity assertions won't hold. - // Compaction is not eager, so these should hold. - assert_eq!(str_addr, new_str_addr); - assert_eq!(str_len, new_str_len); - assert_eq!(str_cap, new_str_cap); + // we should eagerly inline the string + assert!(!compact.is_heap_allocated()); + assert_eq!(compact.len(), str_len); + assert_eq!(compact.capacity(), MAX_SIZE); +} + +#[test] +fn test_from_string_buffer_small_string_with_excess_capacity() { + let mut string = String::with_capacity(128); + string.push_str("abcedfg"); + + let str_ptr = string.as_ptr(); + let str_len = string.len(); + let str_cap = string.capacity(); + + // using from_string_buffer should always re-use the underlying buffer + let mut compact = CompactString::from_string_buffer(string); + assert!(compact.is_heap_allocated()); + + let cpt_ptr = compact.as_ptr(); + let cpt_len = compact.len(); + let cpt_cap = compact.capacity(); + + assert_eq!(str_ptr, cpt_ptr); + assert_eq!(str_len, cpt_len); + assert_eq!(str_cap, cpt_cap); } #[test] fn test_into_string_small_string_with_no_excess_capacity() { let string = String::from("abcdef"); - let str_addr = string.as_ptr(); + let str_len = string.len(); + + let compact = CompactString::from(string); + + // we should eagerly inline the string + assert!(!compact.is_heap_allocated()); + assert_eq!(compact.len(), str_len); + assert_eq!(compact.capacity(), MAX_SIZE); +} + +#[test] +fn test_from_string_buffer_small_string_with_no_excess_capacity() { + let string = String::from("abcdefg"); + + let str_ptr = string.as_ptr(); + let str_len = string.len(); + let str_cap = string.capacity(); + + // using from_string_buffer should always re-use the underlying buffer + let mut compact = CompactString::from_string_buffer(string); + assert!(compact.is_heap_allocated()); + + let cpt_ptr = compact.as_ptr(); + let cpt_len = compact.len(); + let cpt_cap = compact.capacity(); + + assert_eq!(str_ptr, cpt_ptr); + assert_eq!(str_len, cpt_len); + assert_eq!(str_cap, cpt_cap); +} + +#[test] +fn test_roundtrip_from_string_empty_string() { + let string = String::new(); + + let str_ptr = string.as_ptr(); let str_len = string.len(); let str_cap = string.capacity(); let compact = CompactString::from(string); + // we should always inline empty strings + assert!(!compact.is_heap_allocated()); + let new_string = String::from(compact); - let new_str_addr = new_string.as_ptr(); + + let new_str_ptr = new_string.as_ptr(); let new_str_len = new_string.len(); let new_str_cap = new_string.capacity(); - // If small boxed strings are eagerly compacted, the address assertion won't hold. - // Compaction is not eager, so these should hold. - assert_eq!(str_addr, new_str_addr); + assert_eq!(str_ptr, new_str_ptr); assert_eq!(str_len, new_str_len); assert_eq!(str_cap, new_str_cap); } #[test] -fn test_into_string_empty_string() { +fn test_roundtrip_from_string_buffer_empty_string() { let string = String::new(); - let str_addr = string.as_ptr(); + + let str_ptr = string.as_ptr(); let str_len = string.len(); let str_cap = string.capacity(); - let compact = CompactString::from(string); + let compact = CompactString::from_string_buffer(string); + // we should always inline empty strings + assert!(!compact.is_heap_allocated()); + let new_string = String::from(compact); - let new_str_addr = new_string.as_ptr(); + + let new_str_ptr = new_string.as_ptr(); let new_str_len = new_string.len(); let new_str_cap = new_string.capacity(); - assert_eq!(str_addr, new_str_addr); + assert_eq!(str_ptr, new_str_ptr); assert_eq!(str_len, new_str_len); assert_eq!(str_cap, new_str_cap); } @@ -1460,8 +1517,8 @@ fn test_into_cow() { } #[test] -fn test_from_string_inlines_on_push() { - let mut compact = CompactString::from("hello".to_string()); +fn test_from_string_buffer_inlines_on_push() { + let mut compact = CompactString::from_string_buffer("hello".to_string()); assert!(compact.is_heap_allocated()); compact.push_str(" world"); @@ -1470,8 +1527,8 @@ fn test_from_string_inlines_on_push() { } #[test] -fn test_from_string_inlines_on_clone() { - let a = CompactString::from("hello".to_string()); +fn test_from_string_buffer_inlines_on_clone() { + let a = CompactString::from_string_buffer("hello".to_string()); assert!(a.is_heap_allocated()); let b = a.clone(); From 8abf1580878fb383b06dd9c29af86ec7ca554cf4 Mon Sep 17 00:00:00 2001 From: Parker Timmerman Date: Fri, 17 Feb 2023 10:50:11 -0500 Subject: [PATCH 3/3] several changes - add doc comment to CompactString explaining the behavior of From - change Repr::from_string to eagerly inline based on string length, not capacity, which results in us possibly truncating capacity - delete Repr::from_box_str, and have impl From> for CompactString use Repr::from_string - update tests --- compact_str/src/features/proptest.rs | 8 +- compact_str/src/lib.rs | 101 +++++++++++++++++------ compact_str/src/repr/mod.rs | 119 ++++++--------------------- compact_str/src/tests.rs | 5 +- 4 files changed, 106 insertions(+), 127 deletions(-) diff --git a/compact_str/src/features/proptest.rs b/compact_str/src/features/proptest.rs index e0b5bcf5..d756fd61 100644 --- a/compact_str/src/features/proptest.rs +++ b/compact_str/src/features/proptest.rs @@ -29,6 +29,8 @@ mod test { use crate::CompactString; + const MAX_SIZE: usize = std::mem::size_of::(); + proptest! { #[test] #[cfg_attr(miri, ignore)] @@ -38,12 +40,12 @@ mod test { } /// We rely on [`proptest`]'s `String` strategy for generating a `CompactString`. When - /// converting from a `String` into a `CompactString`, our O(1) converstion kicks in and we - /// reuse the buffer, unless empty, and thus all non-empty strings will be heap allocated + /// converting from a `String` into a `CompactString`, if it's short enough we should + /// eagerly inline strings #[test] #[cfg_attr(miri, ignore)] fn proptest_does_not_inline_strings(compact: CompactString) { - if compact.is_empty() { + if compact.len() <= MAX_SIZE { assert!(!compact.is_heap_allocated()); } else { assert!(compact.is_heap_allocated()); diff --git a/compact_str/src/lib.rs b/compact_str/src/lib.rs index 37a10663..6c941241 100644 --- a/compact_str/src/lib.rs +++ b/compact_str/src/lib.rs @@ -87,6 +87,55 @@ mod tests; /// assert_eq!(CompactString::new("chicago"), "chicago"); /// assert_eq!(CompactString::new("houston"), String::from("houston")); /// ``` +/// +/// # Converting from a `String` +/// It's important that a `CompactString` interops well with `String`, so you can easily use both in +/// your code base. +/// +/// `CompactString` implements `From` and operates in the following manner: +/// - Eagerly inlines the string, possibly dropping excess capacity +/// - Otherwise re-uses the same underlying buffer from `String` +/// +/// ``` +/// use compact_str::CompactString; +/// +/// // eagerly inlining +/// let short = String::from("hello world"); +/// let short_c = CompactString::from(short); +/// assert!(!short_c.is_heap_allocated()); +/// +/// // dropping excess capacity +/// let mut excess = String::with_capacity(256); +/// excess.push_str("abc"); +/// +/// let excess_c = CompactString::from(excess); +/// assert!(!excess_c.is_heap_allocated()); +/// assert!(excess_c.capacity() < 256); +/// +/// // re-using the same buffer +/// let long = String::from("this is a longer string that will be heap allocated"); +/// +/// let long_ptr = long.as_ptr(); +/// let long_len = long.len(); +/// let long_cap = long.capacity(); +/// +/// let mut long_c = CompactString::from(long); +/// assert!(long_c.is_heap_allocated()); +/// +/// let cpt_ptr = long_c.as_ptr(); +/// let cpt_len = long_c.len(); +/// let cpt_cap = long_c.capacity(); +/// +/// // the original String and the CompactString point to the same place in memory, buffer re-use! +/// assert_eq!(cpt_ptr, long_ptr); +/// assert_eq!(cpt_len, long_len); +/// assert_eq!(cpt_cap, long_cap); +/// ``` +/// +/// ### Prevent Eagerly Inlining +/// A consequence of eagerly inlining is you then need to de-allocate the existing buffer, which +/// might not always be desirable if you're converting a very large amount of `String`s. If your +/// code is very sensitive to allocations, consider the [`CompactString::from_string_buffer`] API. #[derive(Clone)] #[repr(transparent)] pub struct CompactString(Repr); @@ -1397,17 +1446,17 @@ impl CompactString { /// ``` /// use compact_str::CompactString; /// - /// let x = "hello world".to_string(); - /// let c_inline = CompactString::from(x); - /// - /// // using From, we create an inlined CompactString - /// assert!(!c_inline.is_heap_allocated()); - /// - /// let y = "hello world".to_string(); - /// let c_heap = CompactString::from_string_buffer(y); + /// let short = "hello world".to_string(); + /// let c_heap = CompactString::from_string_buffer(short); /// /// // using CompactString::from_string_buffer, we'll re-use the String's underlying buffer /// assert!(c_heap.is_heap_allocated()); + /// + /// // note: when Clone-ing a short heap allocated string, we'll eagerly inline at that point + /// let c_inline = c_heap.clone(); + /// assert!(!c_inline.is_heap_allocated()); + /// + /// assert_eq!(c_heap, c_inline); /// ``` /// /// ### Longer Strings @@ -1425,19 +1474,6 @@ impl CompactString { /// assert!(c2.is_heap_allocated()); /// ``` /// - /// ### `Clone`-ing - /// ``` - /// use compact_str::CompactString; - /// - /// let x = "hello world".to_string(); - /// let c_heap = CompactString::from_string_buffer(x); - /// assert!(c_heap.is_heap_allocated()); - /// - /// // when cloning, if a string is heap allocated but short, we'll inline the cloned version - /// let c_inline = c_heap.clone(); - /// assert!(!c_inline.is_heap_allocated()); - /// ``` - /// /// ### Buffer Re-use /// ``` /// use compact_str::CompactString; @@ -1445,13 +1481,23 @@ impl CompactString { /// let og = "hello world".to_string(); /// let og_addr = og.as_ptr(); /// - /// let c = CompactString::from_string_buffer(og); - /// let ex = String::from(c); - /// let ex_addr = ex.as_ptr(); + /// let mut c = CompactString::from_string_buffer(og); + /// let ex_addr = c.as_ptr(); /// - /// // When converting to/from String and CompactString, we re-use the same underlying allocated - /// // memory/buffer + /// // When converting to/from String and CompactString with from_string_buffer we always re-use + /// // the same underlying allocated memory/buffer /// assert_eq!(og_addr, ex_addr); + /// + /// let long = "this is a long string that will be on the heap".to_string(); + /// let long_addr = long.as_ptr(); + /// + /// let mut long_c = CompactString::from(long); + /// let long_ex_addr = long_c.as_ptr(); + /// + /// // When converting to/from String and CompactString with From, we'll also re-use the + /// // underlying buffer, if the string is long, otherwise when converting to CompactString we + /// // eagerly inline + /// assert_eq!(long_addr, long_ex_addr); /// ``` #[inline] pub fn from_string_buffer(s: String) -> Self { @@ -1594,7 +1640,8 @@ impl<'a> From> for CompactString { impl From> for CompactString { fn from(b: Box) -> Self { - let repr = Repr::from_box_str(b, true); + let s = b.into_string(); + let repr = Repr::from_string(s, true); CompactString(repr) } } diff --git a/compact_str/src/repr/mod.rs b/compact_str/src/repr/mod.rs index ef1eccd9..ea962ed4 100644 --- a/compact_str/src/repr/mod.rs +++ b/compact_str/src/repr/mod.rs @@ -166,11 +166,11 @@ impl Repr { // We only hit this case if the provided String is > 16MB and we're on a 32-bit arch. We // expect it to be unlikely, thus we hint that to the compiler capacity_on_heap(s) - } else if s.is_empty() { + } else if og_cap == 0 { // We don't expect converting from an empty String often, so we make this code path cold empty() } else if should_inline && s.len() <= MAX_SIZE { - // SAFETY: Checked to make sure the length of s will fit inline + // SAFETY: Checked to make sure the string would fit inline let inline = unsafe { InlineBuffer::new(s.as_str()) }; // SAFETY: `InlineBuffer` and `Repr` are the same size unsafe { mem::transmute(inline) } @@ -237,52 +237,6 @@ impl Repr { } } - #[inline] - pub fn from_box_str(s: Box, should_inline: bool) -> Self { - let og_cap = s.len(); - let cap = Capacity::new(og_cap); - - #[cold] - fn capacity_on_heap(s: Box) -> Repr { - let heap = HeapBuffer::new(&s); - // SAFETY: `BoxString` and `Repr` are the same size - unsafe { mem::transmute(heap) } - } - - #[cold] - fn empty() -> Repr { - EMPTY - } - - if cap.is_heap() { - // We only hit this case if the provided String is > 16MB and we're on a 32-bit arch. We - // expect it to be unlikely, thus we hint that to the compiler - capacity_on_heap(s) - } else if og_cap == 0 { - // We don't expect converting from an empty String often, so we make this code path cold - empty() - } else if should_inline && og_cap <= MAX_SIZE { - // SAFETY: Checked to make sure the length of s will fit inline - let inline = unsafe { InlineBuffer::new(&s) }; - // SAFETY: `InlineBuffer` and `Repr` are the same size - unsafe { mem::transmute(inline) } - } else { - // Don't drop the box here - let raw_ptr = Box::into_raw(s).cast::(); - let ptr = ptr::NonNull::new(raw_ptr).expect("string with capacity has null ptr?"); - - // create a new BoxString with our parts! - let heap = HeapBuffer { - ptr, - len: og_cap, - cap, - }; - - // SAFETY: `BoxString` and `Repr` are the same size - unsafe { mem::transmute(heap) } - } - } - /// Reserves at least `additional` bytes. If there is already enough capacity to store /// `additional` bytes this is a no-op #[inline] @@ -700,22 +654,35 @@ mod tests { #[test_case(String::new(), true; "empty should inline")] #[test_case(String::new(), false; "empty not inline")] + #[test_case(String::with_capacity(10), true ; "empty with small capacity inline")] + #[test_case(String::with_capacity(10), false ; "empty with small capacity not inline")] + #[test_case(String::with_capacity(128), true ; "empty with large capacity inline")] + #[test_case(String::with_capacity(128), false ; "empty with large capacity not inline")] #[test_case(String::from("nyc 🗽"), true; "short should inline")] #[test_case(String::from("nyc 🗽"), false ; "short not inline")] #[test_case(String::from("this is a really long string, which is intended"), true; "long")] #[test_case(String::from("this is a really long string, which is intended"), false; "long not inline")] + #[test_case(EIGHTEEN_MB_STR.to_string(), true ; "huge should inline")] + #[test_case(EIGHTEEN_MB_STR.to_string(), false ; "huge not inline")] fn test_from_string(s: String, try_to_inline: bool) { - let r = Repr::from_string(s.clone(), try_to_inline); + // note: when cloning a String it truncates capacity, which is why we measure these values + // before cloning the string + let s_len = s.len(); + let s_cap = s.capacity(); + let s_str = s.clone(); - assert_eq!(r.len(), s.len()); - assert_eq!(r.as_str(), s.as_str()); + let r = Repr::from_string(s, try_to_inline); + + assert_eq!(r.len(), s_len); + assert_eq!(r.as_str(), s_str.as_str()); - if s.is_empty() { + if s_cap == 0 { // we should always inline the string, if the length of the source string is 0 assert!(!r.is_heap_allocated()); - } else if s.len() <= MAX_SIZE { - // we should inline the string, if we were asked to - assert_eq!(!r.is_heap_allocated(), try_to_inline); + } else if try_to_inline && s_len <= MAX_SIZE { + // we should inline the string, if we were asked to, and the length of the string would + // fit inline, meaning we would truncate capacity + assert!(!r.is_heap_allocated()); } else { assert!(r.is_heap_allocated()); } @@ -729,10 +696,10 @@ mod tests { assert_eq!(r.len(), s.len()); assert_eq!(r.as_str(), s.as_str()); - if s.is_empty() { + if s.capacity() == 0 { // we should always inline the string, if the length of the source string is 0 assert!(!r.is_heap_allocated()); - } else if s.len() <= MAX_SIZE { + } else if s.capacity() <= MAX_SIZE { // we should inline the string, if we were asked to assert_eq!(!r.is_heap_allocated(), try_to_inline); } else { @@ -868,42 +835,4 @@ mod tests { assert_eq!(r_a.capacity(), r_b.capacity()); assert_eq!(r_a.is_heap_allocated(), r_b.is_heap_allocated()); } - - #[test_case("", true ; "empty should inline")] - #[test_case("", false ; "empty not inline")] - #[test_case("q", true ; "single should inline")] - #[test_case("q", false ; "single not inline")] - #[test_case("abc", true ; "short inline")] - #[test_case("abc", false ; "short not inline")] - #[test_case("this is (another) long string that will be heap allocated", true ; "long should inline")] - #[test_case("this is (another) long string that will be heap allocated", false ; "long not inline")] - #[test_case(EIGHTEEN_MB_STR, true ; "huge should inline")] - #[test_case(EIGHTEEN_MB_STR, false ; "huge not inline")] - fn test_from_box_str(initial: &'static str, try_to_inline: bool) { - let box_str = String::from(initial).into_boxed_str(); - - let r = Repr::from_box_str(box_str, try_to_inline); - - assert_eq!(r.as_str(), initial); - assert_eq!(r.len(), initial.len()); - - // when converting from a Box we optionally inline the string, if it would fit - if initial.is_empty() { - // we should always inline the string, if the length of the source string is 0 - assert!(!r.is_heap_allocated()); - assert_eq!(r.capacity(), MAX_SIZE); - } else if initial.len() <= MAX_SIZE { - // we should inline the string, if we were asked to - assert_eq!(!r.is_heap_allocated(), try_to_inline); - let expected_capacity = if try_to_inline { - MAX_SIZE - } else { - initial.len() - }; - assert_eq!(r.capacity(), expected_capacity); - } else { - assert!(r.is_heap_allocated()); - assert_eq!(r.capacity(), initial.len()); - } - } } diff --git a/compact_str/src/tests.rs b/compact_str/src/tests.rs index 2ac5a462..d7f3d4e8 100644 --- a/compact_str/src/tests.rs +++ b/compact_str/src/tests.rs @@ -912,8 +912,9 @@ fn test_into_string_small_string_with_excess_capacity() { let str_len = string.len(); let compact = CompactString::from(string); - - // we should eagerly inline the string + // we should inline this string, which would truncate capacity + // + // note: String truncates capacity on Clone, so truncating here seems reasonable assert!(!compact.is_heap_allocated()); assert_eq!(compact.len(), str_len); assert_eq!(compact.capacity(), MAX_SIZE);