Skip to content

Commit f39ba69

Browse files
committed
auto merge of #13539 : Aatch/rust/vector-copy-faster, r=thestinger
LLVM wasn't recognising the loops as memcpy loops and was therefore failing to optimise them properly. While improving LLVM is the "proper" way to fix this, I think that these cases are important enough to warrant a little low-level optimisation. Fixes #13472 r? @thestinger --- Benchmark Results: ``` --- Before --- test clone_owned ... bench: 6126104 ns/iter (+/- 285962) = 170 MB/s test clone_owned_to_owned ... bench: 6125054 ns/iter (+/- 271197) = 170 MB/s test clone_str ... bench: 80586 ns/iter (+/- 11489) = 13011 MB/s test clone_vec ... bench: 3903220 ns/iter (+/- 658556) = 268 MB/s test test_memcpy ... bench: 69401 ns/iter (+/- 2168) = 15108 MB/s --- After --- test clone_owned ... bench: 70839 ns/iter (+/- 4931) = 14801 MB/s test clone_owned_to_owned ... bench: 70286 ns/iter (+/- 4836) = 14918 MB/s test clone_str ... bench: 78519 ns/iter (+/- 5511) = 13353 MB/s test clone_vec ... bench: 71415 ns/iter (+/- 1999) = 14682 MB/s test test_memcpy ... bench: 70980 ns/iter (+/- 2126) = 14772 MB/s ```
2 parents 61f788c + be334d5 commit f39ba69

File tree

2 files changed

+38
-5
lines changed

2 files changed

+38
-5
lines changed

src/libstd/slice.rs

+21-4
Original file line numberDiff line numberDiff line change
@@ -760,9 +760,25 @@ impl<'a, T: Clone> CloneableVector<T> for &'a [T] {
760760
/// Returns a copy of `v`.
761761
#[inline]
762762
fn to_owned(&self) -> ~[T] {
763-
let mut result = with_capacity(self.len());
764-
for e in self.iter() {
765-
result.push((*e).clone());
763+
let len = self.len();
764+
let mut result = with_capacity(len);
765+
// Unsafe code so this can be optimised to a memcpy (or something
766+
// similarly fast) when T is Copy. LLVM is easily confused, so any
767+
// extra operations during the loop can prevent this optimisation
768+
unsafe {
769+
let mut i = 0;
770+
let p = result.as_mut_ptr();
771+
// Use try_finally here otherwise the write to length
772+
// inside the loop stops LLVM from optimising this.
773+
try_finally(
774+
&mut i, (),
775+
|i, ()| while *i < len {
776+
mem::move_val_init(
777+
&mut(*p.offset(*i as int)),
778+
self.unsafe_ref(*i).clone());
779+
*i += 1;
780+
},
781+
|i| result.set_len(*i));
766782
}
767783
result
768784
}
@@ -2584,7 +2600,8 @@ pub mod bytes {
25842600
impl<A: Clone> Clone for ~[A] {
25852601
#[inline]
25862602
fn clone(&self) -> ~[A] {
2587-
self.iter().map(|item| item.clone()).collect()
2603+
// Use the fast to_owned on &[A] for cloning
2604+
self.as_slice().to_owned()
25882605
}
25892606

25902607
fn clone_from(&mut self, source: &~[A]) {

src/libstd/vec.rs

+17-1
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,23 @@ impl<T: Clone> Vec<T> {
311311

312312
impl<T:Clone> Clone for Vec<T> {
313313
fn clone(&self) -> Vec<T> {
314-
self.iter().map(|x| x.clone()).collect()
314+
let len = self.len;
315+
let mut vector = Vec::with_capacity(len);
316+
// Unsafe code so this can be optimised to a memcpy (or something
317+
// similarly fast) when T is Copy. LLVM is easily confused, so any
318+
// extra operations during the loop can prevent this optimisation
319+
{
320+
let this_slice = self.as_slice();
321+
while vector.len < len {
322+
unsafe {
323+
mem::move_val_init(
324+
vector.as_mut_slice().unsafe_mut_ref(vector.len),
325+
this_slice.unsafe_ref(vector.len).clone());
326+
}
327+
vector.len += 1;
328+
}
329+
}
330+
vector
315331
}
316332

317333
fn clone_from(&mut self, other: &Vec<T>) {

0 commit comments

Comments
 (0)