Skip to content

[proof of concept] give String support for CoW wrapping of literals #46993

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 47 additions & 16 deletions src/liballoc/raw_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ impl<T, A: Alloc> RawVec<T, A> {
///
/// impl<T> MyVec<T> {
/// pub fn push(&mut self, elem: T) {
/// if self.len == self.buf.cap() { self.buf.double(); }
/// if self.len >= self.buf.cap() { self.buf.double(self.len); }
/// // double would have aborted or panicked if the len exceeded
/// // `isize::MAX` so this is safe to do unchecked now.
/// unsafe {
Expand All @@ -285,7 +285,7 @@ impl<T, A: Alloc> RawVec<T, A> {
/// ```
#[inline(never)]
#[cold]
pub fn double(&mut self) {
pub fn double(&mut self, used_cap: usize) {
unsafe {
let elem_size = mem::size_of::<T>();

Expand Down Expand Up @@ -318,12 +318,28 @@ impl<T, A: Alloc> RawVec<T, A> {
}
}
None => {
// skip to 4 because tiny Vec's are dumb; but not if that
// would cause overflow
let new_cap = if elem_size > (!0) / 8 { 1 } else { 4 };
match self.a.alloc_array::<T>(new_cap) {
Ok(ptr) => (new_cap, ptr),
Err(e) => self.a.oom(e),
if used_cap == 0 {
// skip to 4 because tiny Vec's are dumb; but not if that
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it "Vecs", not "Vec's"?

You only need the apostrophe if the "Vec" is possessing something.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a pre-existing comment, but yes

// would cause overflow
let new_cap = if elem_size > (!0) / 8 { 1 } else { 4 };
match self.a.alloc_array::<T>(new_cap) {
Ok(ptr) => (new_cap, ptr),
Err(e) => self.a.oom(e),
}
} else {
// Copy on Write (String), assume the source data is Copy
let new_cap = 2 * used_cap;
let new_size = new_cap * elem_size;
alloc_guard(new_size);

let ptr = match self.a.alloc_array::<T>(new_cap) {
Ok(ptr) => ptr,
Err(e) => self.a.oom(e),
};

ptr::copy_nonoverlapping(self.ptr.as_ptr(), ptr.as_ptr(), used_cap);

(new_cap, ptr)
}
}
};
Expand Down Expand Up @@ -411,8 +427,7 @@ impl<T, A: Alloc> RawVec<T, A> {
// panic.

// Don't actually need any more capacity.
// Wrapping in case they gave a bad `used_cap`.
if self.cap().wrapping_sub(used_cap) >= needed_extra_cap {
if used_cap.checked_add(needed_extra_cap).expect("capacity overflow") <= self.cap() {
return;
}

Expand All @@ -434,6 +449,15 @@ impl<T, A: Alloc> RawVec<T, A> {
Ok(ptr) => Unique::new_unchecked(ptr as *mut T),
Err(e) => self.a.oom(e),
};

// CoW of a literal
if used_cap > self.cap() {
ptr::copy_nonoverlapping(
self.ptr.as_ptr(),
uniq.as_ptr(),
used_cap);
}

self.ptr = uniq;
self.cap = new_cap;
}
Expand Down Expand Up @@ -512,8 +536,7 @@ impl<T, A: Alloc> RawVec<T, A> {
// panic.

// Don't actually need any more capacity.
// Wrapping in case they give a bad `used_cap`
if self.cap().wrapping_sub(used_cap) >= needed_extra_cap {
if used_cap.checked_add(needed_extra_cap).expect("capacity overflow") <= self.cap() {
return;
}

Expand All @@ -536,6 +559,15 @@ impl<T, A: Alloc> RawVec<T, A> {
Ok(ptr) => Unique::new_unchecked(ptr as *mut T),
Err(e) => self.a.oom(e),
};

// CoW of a literal
if used_cap > self.cap() {
ptr::copy_nonoverlapping(
self.ptr.as_ptr(),
uniq.as_ptr(),
used_cap);
}

self.ptr = uniq;
self.cap = new_cap;
}
Expand Down Expand Up @@ -567,12 +599,11 @@ impl<T, A: Alloc> RawVec<T, A> {

// Don't actually need any more capacity. If the current `cap` is 0, we can't
// reallocate in place.
// Wrapping in case they give a bad `used_cap`
let old_layout = match self.current_layout() {
Some(layout) => layout,
None => return false,
};
if self.cap().wrapping_sub(used_cap) >= needed_extra_cap {
if used_cap.checked_add(needed_extra_cap).expect("capacity overflow") <= self.cap() {
return false;
}

Expand Down Expand Up @@ -617,8 +648,8 @@ impl<T, A: Alloc> RawVec<T, A> {
return;
}

// This check is my waterloo; it's the only thing Vec wouldn't have to do.
assert!(self.cap >= amount, "Tried to shrink to a larger capacity");
// If capacity is less than size, assume we're doing some CoW shenanigans (String)
if self.cap < amount { return; }

if amount == 0 {
// We want to create a new zero-length vector within the
Expand Down
76 changes: 72 additions & 4 deletions src/liballoc/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,38 @@ impl String {
String { vec: Vec::new() }
}

#[inline]
#[unstable(feature = "lit_strings", reason = "its TOO lit", issue = "42069")]
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might want to change this to something more informative.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't intended to be landed any time soon, so I opted for a joke :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't intended to be landed any time soon

Do you say this because there is more implementation work to be done, or because it hasn't been decided that this is a good thing? (I'm pretty out of the loop.) If the former, cool. If the latter, has it been discussed somewhere?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a thread on internals about landing this feature as a coercion (rather than a method): https://internals.rust-lang.org/t/pre-rfc-allowing-string-literals-to-be-either-static-str-or-string-similar-to-numeric-literals/5029

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh hey, that's pretty cool! Thanks.

/// Creates a Copy on Write (CoW) String from a literal.
///
/// This defers allocating and copying the literal until it's completely
/// necessary, while still producing a seemingly uniquely owned String.
/// The only strange thing about the String will be that it reports a capacity of 0.
///
/// In the best-case, this will completely eliminate the allocation-and-copy
/// that String::from would perform. In the worst-case, this is just moving
/// that allocation-and-copy to a later part of the program. Although moving
/// work may have subtle consequences for caching and latency.
///
/// CoW strings must reallocate in a few places that other Strings wouldn't:
///
/// * mutable/owned views: `deref_mut`, `as_mut_str`, `into_vec`, etc.
/// * in-place mutations: `make_lowercase_ascii`, `make_uppercase_ascii`
/// * removals: `remove(1)`, `drain(1..3)`, etc.
///
/// Note that truncations (like `pop`, `clear`, or `remove(0)`) don't *require*
/// reallocations, but implementations may still reallocate in these cases
/// because this is easier to implement, or to avoid slowing down the common case.
pub fn literally(lit: &'static str) -> String {
unsafe {
String { vec: Vec::from_raw_parts(
lit.as_ptr() as *mut _,
lit.len(),
0,
)}
}
}

/// Creates a new empty `String` with a particular capacity.
///
/// `String`s have an internal buffer to hold their data. The capacity is
Expand Down Expand Up @@ -745,7 +777,8 @@ impl String {
/// ```
#[inline]
#[stable(feature = "rust1", since = "1.0.0")]
pub fn into_bytes(self) -> Vec<u8> {
pub fn into_bytes(mut self) -> Vec<u8> {
self.make_unique();
self.vec
}

Expand Down Expand Up @@ -783,6 +816,7 @@ impl String {
#[inline]
#[stable(feature = "string_as_str", since = "1.7.0")]
pub fn as_mut_str(&mut self) -> &mut str {
// make_unique handled by deref_mut
self
}

Expand Down Expand Up @@ -1085,6 +1119,10 @@ impl String {

let next = idx + ch.len_utf8();
let len = self.len();

// FIXME: don't use make_unique; create a new buffer and copy only what we need.
self.make_unique();

unsafe {
ptr::copy(self.vec.as_ptr().offset(next as isize),
self.vec.as_mut_ptr().offset(idx as isize),
Expand Down Expand Up @@ -1116,6 +1154,9 @@ impl String {
pub fn retain<F>(&mut self, mut f: F)
where F: FnMut(char) -> bool
{
// FIXME: don't use make_unique; create a new buffer and copy only what we need when CoW!
self.make_unique();

let len = self.len();
let mut del_bytes = 0;
let mut idx = 0;
Expand Down Expand Up @@ -1256,6 +1297,7 @@ impl String {
#[inline]
#[stable(feature = "rust1", since = "1.0.0")]
pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<u8> {
self.make_unique();
&mut self.vec
}

Expand Down Expand Up @@ -1386,6 +1428,9 @@ impl String {
pub fn drain<R>(&mut self, range: R) -> Drain
where R: RangeArgument<usize>
{
// FIXME: do something a lot smarter than a blind clone here
self.make_unique();

// Memory safety
//
// The String version of Drain does not have the memory safety issues
Expand Down Expand Up @@ -1488,10 +1533,21 @@ impl String {
/// let b = s.into_boxed_str();
/// ```
#[stable(feature = "box_str", since = "1.4.0")]
pub fn into_boxed_str(self) -> Box<str> {
pub fn into_boxed_str(mut self) -> Box<str> {
self.make_unique();

let slice = self.vec.into_boxed_slice();
unsafe { from_boxed_utf8_unchecked(slice) }
}

/// Ensure the string isn't CoW, we're about to mutate its contents!
///
/// FIXME: some places can do something smarter with capacity and copies!
#[inline]
fn make_unique(&mut self) {
if self.capacity() > 0 { return }
*self = String { vec: self.vec.clone() }
}
}

impl FromUtf8Error {
Expand Down Expand Up @@ -1586,7 +1642,18 @@ impl fmt::Display for FromUtf16Error {
#[stable(feature = "rust1", since = "1.0.0")]
impl Clone for String {
fn clone(&self) -> Self {
String { vec: self.vec.clone() }
if self.capacity() == 0 {
unsafe {
// String literal, memcopy is a valid clone
String { vec: Vec::from_raw_parts(
self.as_ptr() as *mut _,
self.len(),
0
)}
}
} else {
String { vec: self.vec.clone() }
}
}

fn clone_from(&mut self, source: &Self) {
Expand Down Expand Up @@ -1920,7 +1987,7 @@ impl ops::IndexMut<ops::RangeFrom<usize>> for String {
impl ops::IndexMut<ops::RangeFull> for String {
#[inline]
fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) }
self
}
}
#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")]
Expand Down Expand Up @@ -1952,6 +2019,7 @@ impl ops::Deref for String {
impl ops::DerefMut for String {
#[inline]
fn deref_mut(&mut self) -> &mut str {
self.make_unique();
unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) }
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/liballoc/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#![feature(allocator_api)]
#![feature(alloc_system)]
#![feature(ascii_ctype)]
#![feature(attr_literals)]
#![feature(box_syntax)]
#![feature(inclusive_range_syntax)]
Expand All @@ -20,6 +21,7 @@
#![feature(drain_filter)]
#![feature(exact_size_is_empty)]
#![feature(iterator_step_by)]
#![feature(lit_strings)]
#![feature(pattern)]
#![feature(placement_in_syntax)]
#![feature(rand)]
Expand Down
Loading