From 51517e5135f12d4b5045b83edde65a660eabbef9 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 28 Dec 2015 18:16:58 +0000 Subject: [PATCH 1/6] String/Vec::replace_range(RangeArgument, IntoIterator) --- text/0000-replace-slice.md | 209 +++++++++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 text/0000-replace-slice.md diff --git a/text/0000-replace-slice.md b/text/0000-replace-slice.md new file mode 100644 index 00000000000..cf3a2382108 --- /dev/null +++ b/text/0000-replace-slice.md @@ -0,0 +1,209 @@ +- Feature Name: replace-slice +- Start Date: 2015-12-28 +- RFC PR: +- Rust Issue: + +# Summary +[summary]: #summary + +Add a `replace_slice` method to `Vec` and `String` removes a range of elements, +and replaces it in place with a given sequence of values. +The new sequence does not necessarily have the same length as the range it replaces. + +# Motivation +[motivation]: #motivation + +An implementation of this operation is either slow or dangerous. + +The slow way uses `Vec::drain`, and then `Vec::insert` repeatedly. +The latter part takes quadratic time: +potentially many elements after the replaced range are moved by one offset +potentially many times, once for each new element. + +The dangerous way, detailed below, takes linear time +but involves unsafely moving generic values with `std::ptr::copy`. +This is non-trivial `unsafe` code, where a bug could lead to double-dropping elements +or exposing uninitialized elements. +(Or for `String`, breaking the UTF-8 invariant.) +It therefore benefits form having a shared, carefully-reviewed implementation +rather than leaving it to every potential user to do it themselves. + +While it could be an external crate on crates.io, +this operation is general-purpose enough that I think it belongs in the standard library, +similar to `Vec::drain`. + +# Detailed design +[design]: #detailed-design + +An example implementation is below. + +The proposal is to have inherent methods instead of extension traits. +(Traits are used to make this testable outside of `std` +and to make a point in Unresolved Questions below.) + +```rust +#![feature(collections, collections_range, str_char)] + +extern crate collections; + +use collections::range::RangeArgument; +use std::ptr; + +trait ReplaceVecSlice { + fn replace_slice(&mut self, range: R, iterable: I) + where R: RangeArgument, I: IntoIterator, I::IntoIter: ExactSizeIterator; +} + +impl ReplaceVecSlice for Vec { + fn replace_slice(&mut self, range: R, iterable: I) + where R: RangeArgument, I: IntoIterator, I::IntoIter: ExactSizeIterator + { + let len = self.len(); + let range_start = *range.start().unwrap_or(&0); + let range_end = *range.end().unwrap_or(&len); + assert!(range_start <= range_end); + assert!(range_end <= len); + let mut iter = iterable.into_iter(); + // Overwrite range + for i in range_start..range_end { + if let Some(new_element) = iter.next() { + unsafe { + *self.get_unchecked_mut(i) = new_element + } + } else { + // Iterator shorter than range + self.drain(i..range_end); + return + } + } + // Insert rest + let iter_len = iter.len(); + let elements_after = len - range_end; + let free_space_start = range_end; + let free_space_end = free_space_start + iter_len; + + // FIXME: merge the reallocating case with the first ptr::copy below? + self.reserve(iter_len); + + let p = self.as_mut_ptr(); + unsafe { + // In case iter.next() panics, leak some elements rather than risk double-freeing them. + self.set_len(free_space_start); + // Shift everything over to make space (duplicating some elements). + ptr::copy(p.offset(free_space_start as isize), + p.offset(free_space_end as isize), + elements_after); + for i in free_space_start..free_space_end { + if let Some(new_element) = iter.next() { + *self.get_unchecked_mut(i) = new_element + } else { + // Iterator shorter than its ExactSizeIterator::len() + ptr::copy(p.offset(free_space_end as isize), + p.offset(i as isize), + elements_after); + self.set_len(i + elements_after); + return + } + } + self.set_len(free_space_end + elements_after); + } + // Iterator longer than its ExactSizeIterator::len(), degenerate to quadratic time + for (new_element, i) in iter.zip(free_space_end..) { + self.insert(i, new_element); + } + } +} + +trait ReplaceStringSlice { + fn replace_slice(&mut self, range: R, s: &str) where R: RangeArgument; +} + +impl ReplaceStringSlice for String { + fn replace_slice(&mut self, range: R, s: &str) where R: RangeArgument { + if let Some(&start) = range.start() { + assert!(self.is_char_boundary(start)); + } + if let Some(&end) = range.end() { + assert!(self.is_char_boundary(end)); + } + unsafe { + self.as_mut_vec() + }.replace_slice(range, s.bytes()) + } +} + +#[test] +fn it_works() { + let mut v = vec![1, 2, 3, 4, 5]; + v.replace_slice(2..4, [10, 11, 12].iter().cloned()); + assert_eq!(v, &[1, 2, 10, 11, 12, 5]); + v.replace_slice(1..3, Some(20)); + assert_eq!(v, &[1, 20, 11, 12, 5]); + let mut s = "Hello, world!".to_owned(); + s.replace_slice(7.., "世界!"); + assert_eq!(s, "Hello, 世界!"); +} + +#[test] +#[should_panic] +fn char_boundary() { + let mut s = "Hello, 世界!".to_owned(); + s.replace_slice(..8, "") +} +``` + +This implementation defends against `ExactSizeIterator::len()` being incorrect. +If `len()` is too high, it reserves more capacity than necessary +and does more copying than necessary, +but stays in linear time. +If `len()` is too low, the algorithm degenerates to quadratic time +using `Vec::insert` for each additional new element. + +# Drawbacks +[drawbacks]: #drawbacks + +Same as for any addition to `std`: +not every program needs it, and standard library growth has a maintainance cost. + +# Alternatives +[alternatives]: #alternatives + +* Status quo: leave it to every one who wants this to do it the slow way or the dangerous way. +* Publish a crate on crates.io. + Individual crates tend to be not very discoverable, + so not this situation would not be so different from the status quo. + +# Unresolved questions +[unresolved]: #unresolved-questions + +* Should the `ExactSizeIterator` bound be removed? + The lower bound of `Iterator::size_hint` could be used instead of `ExactSizeIterator::len`, + but the degenerate quadratic time case would become “normal”. + With `ExactSizeIterator` it only happens when `ExactSizeIterator::len` is incorrect + which means that someone is doing something wrong. + +* Alternatively, should `replace_slice` panic when `ExactSizeIterator::len` is incorrect? + +* It would be nice to be able to `Vec::replace_slice` with a slice + without writing `.iter().cloned()` explicitly. + This is possible with the same trick as for the `Extend` trait + ([RFC 839](https://github.com/rust-lang/rfcs/blob/master/text/0839-embrace-extend-extinguish.md)): + accept iterators of `&T` as well as iterators of `T`: + + ```rust + impl<'a, T: 'a> ReplaceVecSlice<&'a T> for Vec where T: Copy { + fn replace_slice(&mut self, range: R, iterable: I) + where R: RangeArgument, I: IntoIterator, I::IntoIter: ExactSizeIterator + { + self.replace_slice(range, iterable.into_iter().cloned()) + } + } + ``` + + However, this trick can not be used with an inherent method instead of a trait. + (By the way, what was the motivation for `Extend` being a trait rather than inherent methods, + before RFC 839?) + +* Naming. + I accidentally typed `replace_range` instead of `replace_slice` several times + while typing up this RFC. From 22d0deaf7c24710cb0e72c4a5d24c21547586bbc Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 29 Dec 2015 07:25:09 +0000 Subject: [PATCH 2/6] replace_slice: incorporate some feedback --- text/0000-replace-slice.md | 53 ++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/text/0000-replace-slice.md b/text/0000-replace-slice.md index cf3a2382108..3ee9d2c966e 100644 --- a/text/0000-replace-slice.md +++ b/text/0000-replace-slice.md @@ -82,30 +82,32 @@ impl ReplaceVecSlice for Vec { let free_space_start = range_end; let free_space_end = free_space_start + iter_len; - // FIXME: merge the reallocating case with the first ptr::copy below? - self.reserve(iter_len); - - let p = self.as_mut_ptr(); - unsafe { - // In case iter.next() panics, leak some elements rather than risk double-freeing them. - self.set_len(free_space_start); - // Shift everything over to make space (duplicating some elements). - ptr::copy(p.offset(free_space_start as isize), - p.offset(free_space_end as isize), - elements_after); - for i in free_space_start..free_space_end { - if let Some(new_element) = iter.next() { - *self.get_unchecked_mut(i) = new_element - } else { - // Iterator shorter than its ExactSizeIterator::len() - ptr::copy(p.offset(free_space_end as isize), - p.offset(i as isize), - elements_after); - self.set_len(i + elements_after); - return + if iter_len > 0 { + // FIXME: merge the reallocating case with the first ptr::copy below? + self.reserve(iter_len); + + let p = self.as_mut_ptr(); + unsafe { + // In case iter.next() panics, leak some elements rather than risk double-freeing them. + self.set_len(free_space_start); + // Shift everything over to make space (duplicating some elements). + ptr::copy(p.offset(free_space_start as isize), + p.offset(free_space_end as isize), + elements_after); + for i in free_space_start..free_space_end { + if let Some(new_element) = iter.next() { + *self.get_unchecked_mut(i) = new_element + } else { + // Iterator shorter than its ExactSizeIterator::len() + ptr::copy(p.offset(free_space_end as isize), + p.offset(i as isize), + elements_after); + self.set_len(i + elements_after); + return + } } + self.set_len(free_space_end + elements_after); } - self.set_len(free_space_end + elements_after); } // Iterator longer than its ExactSizeIterator::len(), degenerate to quadratic time for (new_element, i) in iter.zip(free_space_end..) { @@ -207,3 +209,10 @@ not every program needs it, and standard library growth has a maintainance cost. * Naming. I accidentally typed `replace_range` instead of `replace_slice` several times while typing up this RFC. + Update: I’m told `splice` is how this operation is called. + +* The method could return an iterator of the replaced elements. + Nothing would happen when the method is called, + only when the returned iterator is advanced or dropped. + There’s is precedent of this in `Vec::drain`, + though the input iterator being lazily consumed could be surprising. From 73caab53a7e58f2042f471992c785efade94f214 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 29 Dec 2015 10:59:10 +0000 Subject: [PATCH 3/6] replace_slice -> insert? --- text/0000-replace-slice.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/text/0000-replace-slice.md b/text/0000-replace-slice.md index 3ee9d2c966e..ab9046f77f5 100644 --- a/text/0000-replace-slice.md +++ b/text/0000-replace-slice.md @@ -216,3 +216,7 @@ not every program needs it, and standard library growth has a maintainance cost. only when the returned iterator is advanced or dropped. There’s is precedent of this in `Vec::drain`, though the input iterator being lazily consumed could be surprising. + +* If coherence rules and backward-compatibility allow it, + this functionality could be added to `Vec::insert` and `String::insert` + by overloading them / making them more generic. From 5a44898eb98344b8d81b3fd6381589199ba9a27e Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 29 Dec 2015 11:08:01 +0000 Subject: [PATCH 4/6] impl RangeArgument for usize? --- text/0000-replace-slice.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/text/0000-replace-slice.md b/text/0000-replace-slice.md index ab9046f77f5..c6437981df1 100644 --- a/text/0000-replace-slice.md +++ b/text/0000-replace-slice.md @@ -220,3 +220,7 @@ not every program needs it, and standard library growth has a maintainance cost. * If coherence rules and backward-compatibility allow it, this functionality could be added to `Vec::insert` and `String::insert` by overloading them / making them more generic. + This would probably require implementing `RangeArgument` for `usize` + representing an empty range, + though a range of length 1 would maybe make more sense for `Vec::drain` + (another user of `RangeArgument`). From 087bb92ea72bacabcce8fa636680198d4ad1bbe2 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 12 Feb 2016 18:31:29 +0100 Subject: [PATCH 5/6] replace_range: rename to splice. --- text/0000-replace-slice.md | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/text/0000-replace-slice.md b/text/0000-replace-slice.md index c6437981df1..fa2834148f5 100644 --- a/text/0000-replace-slice.md +++ b/text/0000-replace-slice.md @@ -1,4 +1,4 @@ -- Feature Name: replace-slice +- Feature Name: splice - Start Date: 2015-12-28 - RFC PR: - Rust Issue: @@ -6,7 +6,7 @@ # Summary [summary]: #summary -Add a `replace_slice` method to `Vec` and `String` removes a range of elements, +Add a `splice` method to `Vec` and `String` removes a range of elements, and replaces it in place with a given sequence of values. The new sequence does not necessarily have the same length as the range it replaces. @@ -50,12 +50,12 @@ use collections::range::RangeArgument; use std::ptr; trait ReplaceVecSlice { - fn replace_slice(&mut self, range: R, iterable: I) + fn splice(&mut self, range: R, iterable: I) where R: RangeArgument, I: IntoIterator, I::IntoIter: ExactSizeIterator; } impl ReplaceVecSlice for Vec { - fn replace_slice(&mut self, range: R, iterable: I) + fn splice(&mut self, range: R, iterable: I) where R: RangeArgument, I: IntoIterator, I::IntoIter: ExactSizeIterator { let len = self.len(); @@ -117,11 +117,11 @@ impl ReplaceVecSlice for Vec { } trait ReplaceStringSlice { - fn replace_slice(&mut self, range: R, s: &str) where R: RangeArgument; + fn splice(&mut self, range: R, s: &str) where R: RangeArgument; } impl ReplaceStringSlice for String { - fn replace_slice(&mut self, range: R, s: &str) where R: RangeArgument { + fn splice(&mut self, range: R, s: &str) where R: RangeArgument { if let Some(&start) = range.start() { assert!(self.is_char_boundary(start)); } @@ -130,19 +130,19 @@ impl ReplaceStringSlice for String { } unsafe { self.as_mut_vec() - }.replace_slice(range, s.bytes()) + }.splice(range, s.bytes()) } } #[test] fn it_works() { let mut v = vec![1, 2, 3, 4, 5]; - v.replace_slice(2..4, [10, 11, 12].iter().cloned()); + v.splice(2..4, [10, 11, 12].iter().cloned()); assert_eq!(v, &[1, 2, 10, 11, 12, 5]); - v.replace_slice(1..3, Some(20)); + v.splice(1..3, Some(20)); assert_eq!(v, &[1, 20, 11, 12, 5]); let mut s = "Hello, world!".to_owned(); - s.replace_slice(7.., "世界!"); + s.splice(7.., "世界!"); assert_eq!(s, "Hello, 世界!"); } @@ -150,7 +150,7 @@ fn it_works() { #[should_panic] fn char_boundary() { let mut s = "Hello, 世界!".to_owned(); - s.replace_slice(..8, "") + s.splice(..8, "") } ``` @@ -184,9 +184,9 @@ not every program needs it, and standard library growth has a maintainance cost. With `ExactSizeIterator` it only happens when `ExactSizeIterator::len` is incorrect which means that someone is doing something wrong. -* Alternatively, should `replace_slice` panic when `ExactSizeIterator::len` is incorrect? +* Alternatively, should `splice` panic when `ExactSizeIterator::len` is incorrect? -* It would be nice to be able to `Vec::replace_slice` with a slice +* It would be nice to be able to `Vec::splice` with a slice without writing `.iter().cloned()` explicitly. This is possible with the same trick as for the `Extend` trait ([RFC 839](https://github.com/rust-lang/rfcs/blob/master/text/0839-embrace-extend-extinguish.md)): @@ -194,10 +194,10 @@ not every program needs it, and standard library growth has a maintainance cost. ```rust impl<'a, T: 'a> ReplaceVecSlice<&'a T> for Vec where T: Copy { - fn replace_slice(&mut self, range: R, iterable: I) + fn splice(&mut self, range: R, iterable: I) where R: RangeArgument, I: IntoIterator, I::IntoIter: ExactSizeIterator { - self.replace_slice(range, iterable.into_iter().cloned()) + self.splice(range, iterable.into_iter().cloned()) } } ``` @@ -206,11 +206,6 @@ not every program needs it, and standard library growth has a maintainance cost. (By the way, what was the motivation for `Extend` being a trait rather than inherent methods, before RFC 839?) -* Naming. - I accidentally typed `replace_range` instead of `replace_slice` several times - while typing up this RFC. - Update: I’m told `splice` is how this operation is called. - * The method could return an iterator of the replaced elements. Nothing would happen when the method is called, only when the returned iterator is advanced or dropped. From ae0b0cd0de07af9d13f5c17ba0d15d0067f67f99 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 12 Feb 2016 18:53:01 +0100 Subject: [PATCH 6/6] splice: Return an iterator --- text/0000-replace-slice.md | 127 +++++++++++++------------------------ 1 file changed, 43 insertions(+), 84 deletions(-) diff --git a/text/0000-replace-slice.md b/text/0000-replace-slice.md index fa2834148f5..7e4d0938cfa 100644 --- a/text/0000-replace-slice.md +++ b/text/0000-replace-slice.md @@ -9,6 +9,8 @@ Add a `splice` method to `Vec` and `String` removes a range of elements, and replaces it in place with a given sequence of values. The new sequence does not necessarily have the same length as the range it replaces. +In the `Vec` case, this method returns an iterator of the elements being moved out, like `drain`. + # Motivation [motivation]: #motivation @@ -49,78 +51,44 @@ extern crate collections; use collections::range::RangeArgument; use std::ptr; -trait ReplaceVecSlice { - fn splice(&mut self, range: R, iterable: I) - where R: RangeArgument, I: IntoIterator, I::IntoIter: ExactSizeIterator; +trait VecSplice { + fn splice(&mut self, range: R, iterable: I) -> Splice + where R: RangeArgument, I: IntoIterator; } -impl ReplaceVecSlice for Vec { - fn splice(&mut self, range: R, iterable: I) - where R: RangeArgument, I: IntoIterator, I::IntoIter: ExactSizeIterator +impl VecSplice for Vec { + fn splice(&mut self, range: R, iterable: I) -> Splice + where R: RangeArgument, I: IntoIterator { - let len = self.len(); - let range_start = *range.start().unwrap_or(&0); - let range_end = *range.end().unwrap_or(&len); - assert!(range_start <= range_end); - assert!(range_end <= len); - let mut iter = iterable.into_iter(); - // Overwrite range - for i in range_start..range_end { - if let Some(new_element) = iter.next() { - unsafe { - *self.get_unchecked_mut(i) = new_element - } - } else { - // Iterator shorter than range - self.drain(i..range_end); - return - } - } - // Insert rest - let iter_len = iter.len(); - let elements_after = len - range_end; - let free_space_start = range_end; - let free_space_end = free_space_start + iter_len; - - if iter_len > 0 { - // FIXME: merge the reallocating case with the first ptr::copy below? - self.reserve(iter_len); - - let p = self.as_mut_ptr(); - unsafe { - // In case iter.next() panics, leak some elements rather than risk double-freeing them. - self.set_len(free_space_start); - // Shift everything over to make space (duplicating some elements). - ptr::copy(p.offset(free_space_start as isize), - p.offset(free_space_end as isize), - elements_after); - for i in free_space_start..free_space_end { - if let Some(new_element) = iter.next() { - *self.get_unchecked_mut(i) = new_element - } else { - // Iterator shorter than its ExactSizeIterator::len() - ptr::copy(p.offset(free_space_end as isize), - p.offset(i as isize), - elements_after); - self.set_len(i + elements_after); - return - } - } - self.set_len(free_space_end + elements_after); - } - } - // Iterator longer than its ExactSizeIterator::len(), degenerate to quadratic time - for (new_element, i) in iter.zip(free_space_end..) { - self.insert(i, new_element); - } + unimplemented!() // FIXME: Fill in when exact semantics are decided. } } -trait ReplaceStringSlice { +struct Splice { + vec: &mut Vec, + range: Range + iter: I::IntoIter, + // FIXME: Fill in when exact semantics are decided. +} + +impl Iterator for Splice { + type Item = I::Item; + fn next(&mut self) -> Option { + unimplemented!() // FIXME: Fill in when exact semantics are decided. + } +} + +impl Drop for Splice { + fn drop(&mut self) { + unimplemented!() // FIXME: Fill in when exact semantics are decided. + } +} + +trait StringSplice { fn splice(&mut self, range: R, s: &str) where R: RangeArgument; } -impl ReplaceStringSlice for String { +impl StringSplice for String { fn splice(&mut self, range: R, s: &str) where R: RangeArgument { if let Some(&start) = range.start() { assert!(self.is_char_boundary(start)); @@ -154,12 +122,14 @@ fn char_boundary() { } ``` -This implementation defends against `ExactSizeIterator::len()` being incorrect. -If `len()` is too high, it reserves more capacity than necessary -and does more copying than necessary, -but stays in linear time. -If `len()` is too low, the algorithm degenerates to quadratic time -using `Vec::insert` for each additional new element. +The elements of the vector after the range first be moved by an offset of +the lower bound of `Iterator::size_hint` minus the length of the range. +Then, depending on the real length of the iterator: + +* If it’s the same as the lower bound, we’re done. +* If it’s lower than the lower bound (which was then incorrect), the elements will be moved once more. +* If it’s higher, the extra iterator items well be collected into a temporary `Vec` + in order to know exactly how many there are, and the elements after will be moved once more. # Drawbacks [drawbacks]: #drawbacks @@ -178,13 +148,8 @@ not every program needs it, and standard library growth has a maintainance cost. # Unresolved questions [unresolved]: #unresolved-questions -* Should the `ExactSizeIterator` bound be removed? - The lower bound of `Iterator::size_hint` could be used instead of `ExactSizeIterator::len`, - but the degenerate quadratic time case would become “normal”. - With `ExactSizeIterator` it only happens when `ExactSizeIterator::len` is incorrect - which means that someone is doing something wrong. - -* Alternatively, should `splice` panic when `ExactSizeIterator::len` is incorrect? +* Should the input iterator be consumed incrementally at each `Splice::next` call, + or only in `Splice::drop`? * It would be nice to be able to `Vec::splice` with a slice without writing `.iter().cloned()` explicitly. @@ -193,9 +158,9 @@ not every program needs it, and standard library growth has a maintainance cost. accept iterators of `&T` as well as iterators of `T`: ```rust - impl<'a, T: 'a> ReplaceVecSlice<&'a T> for Vec where T: Copy { + impl<'a, T: 'a> VecSplice<&'a T> for Vec where T: Copy { fn splice(&mut self, range: R, iterable: I) - where R: RangeArgument, I: IntoIterator, I::IntoIter: ExactSizeIterator + where R: RangeArgument, I: IntoIterator { self.splice(range, iterable.into_iter().cloned()) } @@ -206,12 +171,6 @@ not every program needs it, and standard library growth has a maintainance cost. (By the way, what was the motivation for `Extend` being a trait rather than inherent methods, before RFC 839?) -* The method could return an iterator of the replaced elements. - Nothing would happen when the method is called, - only when the returned iterator is advanced or dropped. - There’s is precedent of this in `Vec::drain`, - though the input iterator being lazily consumed could be surprising. - * If coherence rules and backward-compatibility allow it, this functionality could be added to `Vec::insert` and `String::insert` by overloading them / making them more generic.