Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vec::dedup_by optimization #82191

Merged
merged 7 commits into from
Mar 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions library/alloc/benches/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#![feature(btree_drain_filter)]
#![feature(map_first_last)]
#![feature(repr_simd)]
#![feature(slice_partition_dedup)]
#![feature(test)]

extern crate test;
Expand Down
89 changes: 89 additions & 0 deletions library/alloc/benches/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -671,3 +671,92 @@ fn bench_map_fast(b: &mut Bencher) {
let data = black_box([(0, 0); LEN]);
b.iter(|| map_fast(&data));
}

fn random_sorted_fill(mut seed: u32, buf: &mut [u32]) {
let mask = if buf.len() < 8192 {
0xFF
} else if buf.len() < 200_000 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why 200000?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's mostly there to not have too much duplicates, idk

0xFFFF
} else {
0xFFFF_FFFF
};

for item in buf.iter_mut() {
seed ^= seed << 13;
seed ^= seed >> 17;
seed ^= seed << 5;

*item = seed & mask;
}

buf.sort();
}

fn bench_vec_dedup_old(b: &mut Bencher, sz: usize) {
let mut template = vec![0u32; sz];
b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
random_sorted_fill(0x43, &mut template);

let mut vec = template.clone();
b.iter(|| {
let len = {
let (dedup, _) = vec.partition_dedup();
dedup.len()
};
vec.truncate(len);

black_box(vec.first());
vec.clear();
vec.extend_from_slice(&template);
});
}

fn bench_vec_dedup_new(b: &mut Bencher, sz: usize) {
let mut template = vec![0u32; sz];
b.bytes = std::mem::size_of_val(template.as_slice()) as u64;
random_sorted_fill(0x43, &mut template);

let mut vec = template.clone();
b.iter(|| {
vec.dedup();
black_box(vec.first());
vec.clear();
vec.extend_from_slice(&template);
});
}

#[bench]
fn bench_dedup_old_100(b: &mut Bencher) {
bench_vec_dedup_old(b, 100);
}
#[bench]
fn bench_dedup_new_100(b: &mut Bencher) {
bench_vec_dedup_new(b, 100);
}

#[bench]
fn bench_dedup_old_1000(b: &mut Bencher) {
bench_vec_dedup_old(b, 1000);
}
#[bench]
fn bench_dedup_new_1000(b: &mut Bencher) {
bench_vec_dedup_new(b, 1000);
}

#[bench]
fn bench_dedup_old_10000(b: &mut Bencher) {
bench_vec_dedup_old(b, 10000);
}
#[bench]
fn bench_dedup_new_10000(b: &mut Bencher) {
bench_vec_dedup_new(b, 10000);
}

#[bench]
fn bench_dedup_old_100000(b: &mut Bencher) {
bench_vec_dedup_old(b, 100000);
}
#[bench]
fn bench_dedup_new_100000(b: &mut Bencher) {
bench_vec_dedup_new(b, 100000);
}
95 changes: 89 additions & 6 deletions library/alloc/src/vec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1512,15 +1512,98 @@ impl<T, A: Allocator> Vec<T, A> {
/// assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
/// ```
#[stable(feature = "dedup_by", since = "1.16.0")]
pub fn dedup_by<F>(&mut self, same_bucket: F)
pub fn dedup_by<F>(&mut self, mut same_bucket: F)
where
F: FnMut(&mut T, &mut T) -> bool,
{
let len = {
let (dedup, _) = self.as_mut_slice().partition_dedup_by(same_bucket);
dedup.len()
};
self.truncate(len);
let len = self.len();
if len <= 1 {
return;
}

/* INVARIANT: vec.len() > read >= write > write-1 >= 0 */
nagisa marked this conversation as resolved.
Show resolved Hide resolved
struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> {
/* Offset of the element we want to check if it is duplicate */
read: usize,

/* Offset of the place where we want to place the non-duplicate
* when we find it. */
write: usize,

/* The Vec that would need correction if `same_bucket` panicked */
vec: &'a mut Vec<T, A>,
}

impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> {
fn drop(&mut self) {
/* This code gets executed when `same_bucket` panics */

/* SAFETY: invariant guarantees that `read - write`
* and `len - read` never overflow and that the copy is always
* in-bounds. */
unsafe {
let ptr = self.vec.as_mut_ptr();
let len = self.vec.len();

/* How many items were left when `same_bucket` paniced.
* Basically vec[read..].len() */
let items_left = len.wrapping_sub(self.read);

/* Pointer to first item in vec[write..write+items_left] slice */
let dropped_ptr = ptr.add(self.write);
/* Pointer to first item in vec[read..] slice */
let valid_ptr = ptr.add(self.read);

/* Copy `vec[read..]` to `vec[write..write+items_left]`.
* The slices can overlap, so `copy_nonoverlapping` cannot be used */
ptr::copy(valid_ptr, dropped_ptr, items_left);

/* How many items have been already dropped
* Basically vec[read..write].len() */
let dropped = self.read.wrapping_sub(self.write);

self.vec.set_len(len - dropped);
}
}
}

let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self };
let ptr = gap.vec.as_mut_ptr();

/* Drop items while going through Vec, it should be more efficient than
* doing slice partition_dedup + truncate */

/* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr
* are always in-bounds and read_ptr never aliases prev_ptr */
unsafe {
while gap.read < len {
let read_ptr = ptr.add(gap.read);
nagisa marked this conversation as resolved.
Show resolved Hide resolved
let prev_ptr = ptr.add(gap.write.wrapping_sub(1));
nagisa marked this conversation as resolved.
Show resolved Hide resolved

if same_bucket(&mut *read_ptr, &mut *prev_ptr) {
/* We have found duplicate, drop it in-place */
ptr::drop_in_place(read_ptr);
} else {
let write_ptr = ptr.add(gap.write);

/* Because `read_ptr` can be equal to `write_ptr`, we either
* have to use `copy` or conditional `copy_nonoverlapping`.
* Looks like the first option is faster. */
ptr::copy(read_ptr, write_ptr, 1);
nagisa marked this conversation as resolved.
Show resolved Hide resolved

/* We have filled that place, so go further */
gap.write += 1;
}

gap.read += 1;
}

/* Technically we could let `gap` clean up with its Drop, but
* when `same_bucket` is guaranteed to not panic, this bloats a little
* the codegen, so we just do it manually */
gap.vec.set_len(gap.write);
mem::forget(gap);
}
}

/// Appends an element to the back of a collection.
Expand Down
1 change: 1 addition & 0 deletions library/alloc/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#![feature(int_bits_const)]
#![feature(vecdeque_binary_search)]
#![feature(slice_group_by)]
#![feature(slice_partition_dedup)]
#![feature(vec_extend_from_within)]
#![feature(vec_spare_capacity)]

Expand Down
126 changes: 126 additions & 0 deletions library/alloc/tests/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2102,6 +2102,132 @@ fn test_extend_from_within() {
assert_eq!(v, ["a", "b", "c", "b", "c", "a", "b"]);
}

#[test]
fn test_vec_dedup_by() {
let mut vec: Vec<i32> = vec![1, -1, 2, 3, 1, -5, 5, -2, 2];

vec.dedup_by(|a, b| a.abs() == b.abs());

assert_eq!(vec, [1, 2, 3, 1, -5, -2]);
}

#[test]
fn test_vec_dedup_empty() {
let mut vec: Vec<i32> = Vec::new();

vec.dedup();

assert_eq!(vec, []);
}

#[test]
fn test_vec_dedup_one() {
let mut vec = vec![12i32];

vec.dedup();

assert_eq!(vec, [12]);
}

#[test]
fn test_vec_dedup_multiple_ident() {
let mut vec = vec![12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11];

vec.dedup();

assert_eq!(vec, [12, 11]);
}

#[test]
fn test_vec_dedup_partialeq() {
#[derive(Debug)]
struct Foo(i32, i32);

impl PartialEq for Foo {
fn eq(&self, other: &Foo) -> bool {
self.0 == other.0
}
}

let mut vec = vec![Foo(0, 1), Foo(0, 5), Foo(1, 7), Foo(1, 9)];

vec.dedup();
assert_eq!(vec, [Foo(0, 1), Foo(1, 7)]);
}

#[test]
fn test_vec_dedup() {
let mut vec: Vec<bool> = Vec::with_capacity(8);
let mut template = vec.clone();

for x in 0u8..255u8 {
vec.clear();
template.clear();

let iter = (0..8).map(move |bit| (x >> bit) & 1 == 1);
vec.extend(iter);
template.extend_from_slice(&vec);

let (dedup, _) = template.partition_dedup();
vec.dedup();

assert_eq!(vec, dedup);
}
}

#[test]
fn test_vec_dedup_panicking() {
#[derive(Debug)]
struct Panic {
drop_counter: &'static AtomicU32,
value: bool,
index: usize,
}

impl PartialEq for Panic {
fn eq(&self, other: &Self) -> bool {
self.value == other.value
}
}

impl Drop for Panic {
fn drop(&mut self) {
let x = self.drop_counter.fetch_add(1, Ordering::SeqCst);
assert!(x != 4);
}
}

static DROP_COUNTER: AtomicU32 = AtomicU32::new(0);
let expected = [
Panic { drop_counter: &DROP_COUNTER, value: false, index: 0 },
Panic { drop_counter: &DROP_COUNTER, value: false, index: 5 },
Panic { drop_counter: &DROP_COUNTER, value: true, index: 6 },
Panic { drop_counter: &DROP_COUNTER, value: true, index: 7 },
];
let mut vec = vec![
Panic { drop_counter: &DROP_COUNTER, value: false, index: 0 },
// these elements get deduplicated
Panic { drop_counter: &DROP_COUNTER, value: false, index: 1 },
Panic { drop_counter: &DROP_COUNTER, value: false, index: 2 },
Panic { drop_counter: &DROP_COUNTER, value: false, index: 3 },
Panic { drop_counter: &DROP_COUNTER, value: false, index: 4 },
// here it panics
Panic { drop_counter: &DROP_COUNTER, value: false, index: 5 },
Panic { drop_counter: &DROP_COUNTER, value: true, index: 6 },
Panic { drop_counter: &DROP_COUNTER, value: true, index: 7 },
];

let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
vec.dedup();
}));

let ok = vec.iter().zip(expected.iter()).all(|(x, y)| x.index == y.index);

if !ok {
panic!("expected: {:?}\ngot: {:?}\n", expected, vec);
}
}

// Regression test for issue #82533
#[test]
fn test_extend_from_within_panicing_clone() {
Expand Down