diff --git a/src/liballoc/raw_vec.rs b/src/liballoc/raw_vec.rs index dbf1fb1367dda..fc9dbbe157584 100644 --- a/src/liballoc/raw_vec.rs +++ b/src/liballoc/raw_vec.rs @@ -269,7 +269,7 @@ impl RawVec { /// /// impl MyVec { /// pub fn push(&mut self, elem: T) { - /// if self.len == self.buf.cap() { self.buf.double(); } + /// if self.len >= self.buf.cap() { self.buf.double(self.len); } /// // double would have aborted or panicked if the len exceeded /// // `isize::MAX` so this is safe to do unchecked now. /// unsafe { @@ -285,7 +285,7 @@ impl RawVec { /// ``` #[inline(never)] #[cold] - pub fn double(&mut self) { + pub fn double(&mut self, used_cap: usize) { unsafe { let elem_size = mem::size_of::(); @@ -318,12 +318,28 @@ impl RawVec { } } None => { - // skip to 4 because tiny Vec's are dumb; but not if that - // would cause overflow - let new_cap = if elem_size > (!0) / 8 { 1 } else { 4 }; - match self.a.alloc_array::(new_cap) { - Ok(ptr) => (new_cap, ptr), - Err(e) => self.a.oom(e), + if used_cap == 0 { + // skip to 4 because tiny Vec's are dumb; but not if that + // would cause overflow + let new_cap = if elem_size > (!0) / 8 { 1 } else { 4 }; + match self.a.alloc_array::(new_cap) { + Ok(ptr) => (new_cap, ptr), + Err(e) => self.a.oom(e), + } + } else { + // Copy on Write (String), assume the source data is Copy + let new_cap = 2 * used_cap; + let new_size = new_cap * elem_size; + alloc_guard(new_size); + + let ptr = match self.a.alloc_array::(new_cap) { + Ok(ptr) => ptr, + Err(e) => self.a.oom(e), + }; + + ptr::copy_nonoverlapping(self.ptr.as_ptr(), ptr.as_ptr(), used_cap); + + (new_cap, ptr) } } }; @@ -411,8 +427,7 @@ impl RawVec { // panic. // Don't actually need any more capacity. - // Wrapping in case they gave a bad `used_cap`. - if self.cap().wrapping_sub(used_cap) >= needed_extra_cap { + if used_cap.checked_add(needed_extra_cap).expect("capacity overflow") <= self.cap() { return; } @@ -434,6 +449,15 @@ impl RawVec { Ok(ptr) => Unique::new_unchecked(ptr as *mut T), Err(e) => self.a.oom(e), }; + + // CoW of a literal + if used_cap > self.cap() { + ptr::copy_nonoverlapping( + self.ptr.as_ptr(), + uniq.as_ptr(), + used_cap); + } + self.ptr = uniq; self.cap = new_cap; } @@ -512,8 +536,7 @@ impl RawVec { // panic. // Don't actually need any more capacity. - // Wrapping in case they give a bad `used_cap` - if self.cap().wrapping_sub(used_cap) >= needed_extra_cap { + if used_cap.checked_add(needed_extra_cap).expect("capacity overflow") <= self.cap() { return; } @@ -536,6 +559,15 @@ impl RawVec { Ok(ptr) => Unique::new_unchecked(ptr as *mut T), Err(e) => self.a.oom(e), }; + + // CoW of a literal + if used_cap > self.cap() { + ptr::copy_nonoverlapping( + self.ptr.as_ptr(), + uniq.as_ptr(), + used_cap); + } + self.ptr = uniq; self.cap = new_cap; } @@ -567,12 +599,11 @@ impl RawVec { // Don't actually need any more capacity. If the current `cap` is 0, we can't // reallocate in place. - // Wrapping in case they give a bad `used_cap` let old_layout = match self.current_layout() { Some(layout) => layout, None => return false, }; - if self.cap().wrapping_sub(used_cap) >= needed_extra_cap { + if used_cap.checked_add(needed_extra_cap).expect("capacity overflow") <= self.cap() { return false; } @@ -617,8 +648,8 @@ impl RawVec { return; } - // This check is my waterloo; it's the only thing Vec wouldn't have to do. - assert!(self.cap >= amount, "Tried to shrink to a larger capacity"); + // If capacity is less than size, assume we're doing some CoW shenanigans (String) + if self.cap < amount { return; } if amount == 0 { // We want to create a new zero-length vector within the diff --git a/src/liballoc/string.rs b/src/liballoc/string.rs index ca493ab27e3ad..ac3481d189a8b 100644 --- a/src/liballoc/string.rs +++ b/src/liballoc/string.rs @@ -384,6 +384,38 @@ impl String { String { vec: Vec::new() } } + #[inline] + #[unstable(feature = "lit_strings", reason = "its TOO lit", issue = "42069")] + /// Creates a Copy on Write (CoW) String from a literal. + /// + /// This defers allocating and copying the literal until it's completely + /// necessary, while still producing a seemingly uniquely owned String. + /// The only strange thing about the String will be that it reports a capacity of 0. + /// + /// In the best-case, this will completely eliminate the allocation-and-copy + /// that String::from would perform. In the worst-case, this is just moving + /// that allocation-and-copy to a later part of the program. Although moving + /// work may have subtle consequences for caching and latency. + /// + /// CoW strings must reallocate in a few places that other Strings wouldn't: + /// + /// * mutable/owned views: `deref_mut`, `as_mut_str`, `into_vec`, etc. + /// * in-place mutations: `make_lowercase_ascii`, `make_uppercase_ascii` + /// * removals: `remove(1)`, `drain(1..3)`, etc. + /// + /// Note that truncations (like `pop`, `clear`, or `remove(0)`) don't *require* + /// reallocations, but implementations may still reallocate in these cases + /// because this is easier to implement, or to avoid slowing down the common case. + pub fn literally(lit: &'static str) -> String { + unsafe { + String { vec: Vec::from_raw_parts( + lit.as_ptr() as *mut _, + lit.len(), + 0, + )} + } + } + /// Creates a new empty `String` with a particular capacity. /// /// `String`s have an internal buffer to hold their data. The capacity is @@ -745,7 +777,8 @@ impl String { /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] - pub fn into_bytes(self) -> Vec { + pub fn into_bytes(mut self) -> Vec { + self.make_unique(); self.vec } @@ -783,6 +816,7 @@ impl String { #[inline] #[stable(feature = "string_as_str", since = "1.7.0")] pub fn as_mut_str(&mut self) -> &mut str { + // make_unique handled by deref_mut self } @@ -1085,6 +1119,10 @@ impl String { let next = idx + ch.len_utf8(); let len = self.len(); + + // FIXME: don't use make_unique; create a new buffer and copy only what we need. + self.make_unique(); + unsafe { ptr::copy(self.vec.as_ptr().offset(next as isize), self.vec.as_mut_ptr().offset(idx as isize), @@ -1116,6 +1154,9 @@ impl String { pub fn retain(&mut self, mut f: F) where F: FnMut(char) -> bool { + // FIXME: don't use make_unique; create a new buffer and copy only what we need when CoW! + self.make_unique(); + let len = self.len(); let mut del_bytes = 0; let mut idx = 0; @@ -1256,6 +1297,7 @@ impl String { #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub unsafe fn as_mut_vec(&mut self) -> &mut Vec { + self.make_unique(); &mut self.vec } @@ -1386,6 +1428,9 @@ impl String { pub fn drain(&mut self, range: R) -> Drain where R: RangeArgument { + // FIXME: do something a lot smarter than a blind clone here + self.make_unique(); + // Memory safety // // The String version of Drain does not have the memory safety issues @@ -1488,10 +1533,21 @@ impl String { /// let b = s.into_boxed_str(); /// ``` #[stable(feature = "box_str", since = "1.4.0")] - pub fn into_boxed_str(self) -> Box { + pub fn into_boxed_str(mut self) -> Box { + self.make_unique(); + let slice = self.vec.into_boxed_slice(); unsafe { from_boxed_utf8_unchecked(slice) } } + + /// Ensure the string isn't CoW, we're about to mutate its contents! + /// + /// FIXME: some places can do something smarter with capacity and copies! + #[inline] + fn make_unique(&mut self) { + if self.capacity() > 0 { return } + *self = String { vec: self.vec.clone() } + } } impl FromUtf8Error { @@ -1586,7 +1642,18 @@ impl fmt::Display for FromUtf16Error { #[stable(feature = "rust1", since = "1.0.0")] impl Clone for String { fn clone(&self) -> Self { - String { vec: self.vec.clone() } + if self.capacity() == 0 { + unsafe { + // String literal, memcopy is a valid clone + String { vec: Vec::from_raw_parts( + self.as_ptr() as *mut _, + self.len(), + 0 + )} + } + } else { + String { vec: self.vec.clone() } + } } fn clone_from(&mut self, source: &Self) { @@ -1920,7 +1987,7 @@ impl ops::IndexMut> for String { impl ops::IndexMut for String { #[inline] fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str { - unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) } + self } } #[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")] @@ -1952,6 +2019,7 @@ impl ops::Deref for String { impl ops::DerefMut for String { #[inline] fn deref_mut(&mut self) -> &mut str { + self.make_unique(); unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) } } } diff --git a/src/liballoc/tests/lib.rs b/src/liballoc/tests/lib.rs index f1e95883b3827..611240880701a 100644 --- a/src/liballoc/tests/lib.rs +++ b/src/liballoc/tests/lib.rs @@ -12,6 +12,7 @@ #![feature(allocator_api)] #![feature(alloc_system)] +#![feature(ascii_ctype)] #![feature(attr_literals)] #![feature(box_syntax)] #![feature(inclusive_range_syntax)] @@ -20,6 +21,7 @@ #![feature(drain_filter)] #![feature(exact_size_is_empty)] #![feature(iterator_step_by)] +#![feature(lit_strings)] #![feature(pattern)] #![feature(placement_in_syntax)] #![feature(rand)] diff --git a/src/liballoc/tests/string.rs b/src/liballoc/tests/string.rs index ef6f5e10a72de..a073a3f30c12d 100644 --- a/src/liballoc/tests/string.rs +++ b/src/liballoc/tests/string.rs @@ -504,3 +504,227 @@ fn test_into_boxed_str() { let ys = xs.into_boxed_str(); assert_eq!(&*ys, "hello my name is bob"); } + +// Integration test for CoW strings +fn cowlympics(lit: &'static str) { + // Things that don't CoW, and basic sanity checks + non_cowlympic_events(lit); + + + // Various appends + cowlympic_event(lit, true, |string| { + string.push_str("!"); + }); + + cowlympic_event(lit, true, |string| { + string.push_str("šŸ”„"); + }); + + cowlympic_event(lit, true, |string| { + string.push_str("!!!!"); + }); + + cowlympic_event(lit, true, |string| { + string.push_str("šŸ”„šŸ®šŸ”„"); + }); + + cowlympic_event(lit, true, |string| { + string.push('!'); + }); + + cowlympic_event(lit, true, |string| { + string.push('šŸ”„'); + }); + + cowlympic_event(lit, true, |string| { + *string += "šŸ”„šŸ®šŸ”„"; + }); + + // Could be made to not allocate, but for now it does, so test it! + if lit.len() > 0 { + cowlympic_event(lit, false, |string| { + string.push_str(""); + }); + } + + // Removals + if lit.len() > 0 { + cowlympic_event(lit, true, |string| { + string.remove(0); + }); + } + + if lit.chars().count() > 1 { + cowlympic_event(lit, true, |string| { + let (idx, _) = { + let mut it = string.char_indices(); + it.next().unwrap(); + it.next().unwrap() + }; + string.remove(idx); + }); + } + + // Could be made non-cow, but for now check it! + cowlympic_event(lit, !lit.is_empty(), |string| { + string.drain(..); + }); + + cowlympic_event(lit, !lit.is_empty(), |string| { + string.retain(|_| false); + }); + + // In-place mutations + if lit.is_ascii() && !lit.is_empty() { + let has_upper_case = lit.chars().any(|char| char.is_ascii_uppercase()); + let has_lower_case = lit.chars().any(|char| char.is_ascii_lowercase()); + cowlympic_event(lit, has_upper_case, |string| { + string.make_ascii_lowercase() + }); + + cowlympic_event(lit, has_lower_case, |string| { + string.make_ascii_uppercase() + }); + } + + // mutable views + cowlympic_event(lit, false, |string| { + let _temp = &mut**string; + }); + + cowlympic_event(lit, false, |string| { + let _temp = string.as_mut_str(); + }); + + cowlympic_event(lit, false, |string| { + let _temp = unsafe { string.as_mut_vec() }; + }); +} + +fn non_cowlympic_events(lit: &'static str) { + let lit_ptr = lit.as_ptr(); + let lit_len = lit.len(); + + // We're only going to truncate this (no CoW) + let mut string1 = String::literally(lit); + assert_eq!(string1.capacity(), 0); + assert_eq!(string1.len(), lit_len); + assert_eq!(string1.as_ptr(), lit_ptr); + assert_eq!(string1, String::from(lit)); + + + // Check that pop doesn't trigger CoW, doesn't corrupt. + // Copy for comparison (also trivial copy) + { + let string2 = string1.clone(); + assert_eq!(string2.capacity(), 0); + assert_eq!(string2.len(), lit_len); + assert_eq!(string2.as_ptr(), lit_ptr); + assert_eq!(string2, String::from(lit)); + + let old_len = string1.len(); + assert_eq!(string1.pop(), lit.chars().next_back()); + if old_len > 0 { + assert_ne!(string1, lit); + assert_ne!(string1, string2); + assert!(string1 < string2); + assert!(lit.contains(&string1)); + assert!(string1.len() < old_len); + } + assert_eq!(string1.capacity(), 0); + assert_eq!(string1.as_ptr(), lit_ptr); + } + + // Check that truncate doesn't trigger CoW, doesn't corrupt. + // Copy for comparison (trivial copy) + { + let string1_popped = string1.clone(); + assert_eq!(string1_popped.capacity(), 0); + assert_eq!(string1_popped.len(), string1.len()); + assert_eq!(string1_popped.as_ptr(), lit_ptr); + assert_eq!(string1_popped, string1); + + let old_len = string1.len(); + let offset = string1.char_indices() + .nth(string1.chars().count()/2) + .map(|(idx, _)| idx) + .unwrap_or(0); + string1.truncate(offset); + if old_len > 0 { + assert_ne!(string1, lit); + assert!(string1 < string1_popped); + assert!(string1_popped.contains(&string1)); + assert!(string1.len() < old_len); + } + assert_eq!(string1.capacity(), 0); + assert_eq!(string1.as_ptr(), lit_ptr); + } + + + // Check that clear doesn't trigger CoW, doesn't corrupt. + // Copy for comparison (also trivial copy) + { + let string1_truncated = string1.clone(); + assert_eq!(string1_truncated.capacity(), 0); + assert_eq!(string1_truncated.len(), string1.len()); + assert_eq!(string1_truncated.as_ptr(), lit_ptr); + assert_eq!(string1_truncated, string1); + + let old_len = string1.len(); + string1.clear(); + if old_len > 0 { + assert_ne!(string1, lit); + assert!(string1 < string1_truncated); + assert!(string1_truncated.contains(&string1)); + assert!(string1.len() < old_len); + } + assert_eq!(string1.capacity(), 0); + assert_eq!(string1.len(), 0); + assert_eq!(string1, String::new()); + assert_eq!(string1.as_ptr(), lit_ptr); + } +} + +// Event must change string value +fn cowlympic_event(lit: &'static str, net_mutates: bool, mut event: F) + where F: FnMut(&mut String) -> R +{ + let mut cow = String::literally(lit); + let mut owned = String::from(lit); + + let lit_ptr = lit.as_ptr(); + let lit_len = lit.len(); + + event(&mut cow); + event(&mut owned); + + if net_mutates { + assert_ne!(owned, lit); + assert_ne!(cow, lit); + } else { + assert_eq!(cow, lit); + assert_eq!(owned, lit); + assert_eq!(cow.len(), lit_len); + } + + assert_eq!(cow, owned); + assert_eq!(cow.len(), owned.len()); + assert_ne!(cow.as_ptr(), lit_ptr); + + if owned.capacity() != 0 { + assert_ne!(cow.capacity(), 0); + } +} + + +#[test] +fn test_cow() { + cowlympics("it's lit šŸ”„šŸ®šŸ”„"); + cowlympics("OwO what's this?"); + cowlympics("!"); + cowlympics("!?"); + cowlympics("šŸ”„šŸ®"); + cowlympics("🐮"); + cowlympics("ā“’aā“’eā“’uā“’"); + cowlympics(""); +} diff --git a/src/liballoc/vec.rs b/src/liballoc/vec.rs index 93d7e66b7b203..6764fb3f7a77f 100644 --- a/src/liballoc/vec.rs +++ b/src/liballoc/vec.rs @@ -732,8 +732,8 @@ impl Vec { assert!(index <= len); // space for the new element - if len == self.buf.cap() { - self.buf.double(); + if len >= self.buf.cap() { + self.buf.double(len); } unsafe { @@ -967,8 +967,8 @@ impl Vec { pub fn push(&mut self, value: T) { // This will panic or abort if we would allocate > isize::MAX bytes // or if the length increment would overflow for zero-sized types. - if self.len == self.buf.cap() { - self.buf.double(); + if self.len >= self.buf.cap() { + self.buf.double(self.len); } unsafe { let end = self.as_mut_ptr().offset(self.len as isize); @@ -1920,7 +1920,7 @@ impl Vec { // } while let Some(element) = iterator.next() { let len = self.len(); - if len == self.capacity() { + if len >= self.capacity() { let (lower, _) = iterator.size_hint(); self.reserve(lower.saturating_add(1)); } @@ -2534,8 +2534,8 @@ impl<'a, T> Placer for PlaceBack<'a, T> { fn make_place(self) -> Self { // This will panic or abort if we would allocate > isize::MAX bytes // or if the length increment would overflow for zero-sized types. - if self.vec.len == self.vec.buf.cap() { - self.vec.buf.double(); + if self.vec.len >= self.vec.buf.cap() { + self.vec.buf.double(self.vec.len); } self } diff --git a/src/liballoc/vec_deque.rs b/src/liballoc/vec_deque.rs index f56aa23a4eb2f..905a930d1a84a 100644 --- a/src/liballoc/vec_deque.rs +++ b/src/liballoc/vec_deque.rs @@ -1754,7 +1754,7 @@ impl VecDeque { fn grow_if_necessary(&mut self) { if self.is_full() { let old_cap = self.cap(); - self.buf.double(); + self.buf.double(old_cap); unsafe { self.handle_cap_increase(old_cap); }