Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Vec::dedup_by and Vec::dedup_by_key #36743

Merged
merged 4 commits into from
Oct 14, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 126 additions & 83 deletions src/libcollections/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,130 @@ impl<T> Vec<T> {
}
}

/// Removes consecutive elements in the vector that resolve to the same key.
///
/// If the vector is sorted, this removes all duplicates.
///
/// # Examples
///
/// ```
/// #![feature(dedup_by)]
///
/// let mut vec = vec![10, 20, 21, 30, 20];
///
/// vec.dedup_by_key(|i| *i / 10);
///
/// assert_eq!(vec, [10, 20, 30, 20]);
/// ```
#[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")]
#[inline]
pub fn dedup_by_key<F, K>(&mut self, mut key: F) where F: FnMut(&mut T) -> K, K: PartialEq {
self.dedup_by(|a, b| key(a) == key(b))
}

/// Removes consecutive elements in the vector that resolve to the same key.
///
/// If the vector is sorted, this removes all duplicates.
///
/// # Examples
///
/// ```
/// #![feature(dedup_by)]
/// use std::ascii::AsciiExt;
///
/// let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"];
///
/// vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b));
///
/// assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
/// ```
#[unstable(feature = "dedup_by", reason = "recently added", issue = "37087")]
pub fn dedup_by<F>(&mut self, mut same_bucket: F) where F: FnMut(&mut T, &mut T) -> bool {
unsafe {
// Although we have a mutable reference to `self`, we cannot make
// *arbitrary* changes. The `PartialEq` comparisons could panic, so we
// must ensure that the vector is in a valid state at all time.
//
// The way that we handle this is by using swaps; we iterate
// over all the elements, swapping as we go so that at the end
// the elements we wish to keep are in the front, and those we
// wish to reject are at the back. We can then truncate the
// vector. This operation is still O(n).
//
// Example: We start in this state, where `r` represents "next
// read" and `w` represents "next_write`.
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this is not a duplicate, so
// we swap self[r] and self[w] (no effect as r==w) and then increment both
// r and w, leaving us with:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this value is a duplicate,
// so we increment `r` but leave everything else unchanged:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this is not a duplicate,
// so swap self[r] and self[w] and advance r and w:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 2 | 1 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Not a duplicate, repeat:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 2 | 3 | 1 | 3 |
// +---+---+---+---+---+---+
// w
//
// Duplicate, advance r. End of vec. Truncate to w.

let ln = self.len();
if ln <= 1 {
return;
}

// Avoid bounds checks by using raw pointers.
let p = self.as_mut_ptr();
let mut r: usize = 1;
let mut w: usize = 1;

while r < ln {
let p_r = p.offset(r as isize);
let p_wm1 = p.offset((w - 1) as isize);
if !same_bucket(&mut *p_r, &mut *p_wm1) {
if r != w {
let p_w = p_wm1.offset(1);
mem::swap(&mut *p_r, &mut *p_w);
}
w += 1;
}
r += 1;
}

self.truncate(w);
}
}

/// Appends an element to the back of a collection.
///
/// # Panics
Expand Down Expand Up @@ -1156,90 +1280,9 @@ impl<T: PartialEq> Vec<T> {
/// assert_eq!(vec, [1, 2, 3, 2]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
pub fn dedup(&mut self) {
unsafe {
// Although we have a mutable reference to `self`, we cannot make
// *arbitrary* changes. The `PartialEq` comparisons could panic, so we
// must ensure that the vector is in a valid state at all time.
//
// The way that we handle this is by using swaps; we iterate
// over all the elements, swapping as we go so that at the end
// the elements we wish to keep are in the front, and those we
// wish to reject are at the back. We can then truncate the
// vector. This operation is still O(n).
//
// Example: We start in this state, where `r` represents "next
// read" and `w` represents "next_write`.
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this is not a duplicate, so
// we swap self[r] and self[w] (no effect as r==w) and then increment both
// r and w, leaving us with:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this value is a duplicate,
// so we increment `r` but leave everything else unchanged:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 1 | 2 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Comparing self[r] against self[w-1], this is not a duplicate,
// so swap self[r] and self[w] and advance r and w:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 2 | 1 | 3 | 3 |
// +---+---+---+---+---+---+
// w
//
// Not a duplicate, repeat:
//
// r
// +---+---+---+---+---+---+
// | 0 | 1 | 2 | 3 | 1 | 3 |
// +---+---+---+---+---+---+
// w
//
// Duplicate, advance r. End of vec. Truncate to w.

let ln = self.len();
if ln <= 1 {
return;
}

// Avoid bounds checks by using raw pointers.
let p = self.as_mut_ptr();
let mut r: usize = 1;
let mut w: usize = 1;

while r < ln {
let p_r = p.offset(r as isize);
let p_wm1 = p.offset((w - 1) as isize);
if *p_r != *p_wm1 {
if r != w {
let p_w = p_wm1.offset(1);
mem::swap(&mut *p_r, &mut *p_w);
}
w += 1;
}
r += 1;
}

self.truncate(w);
}
self.dedup_by(|a, b| a == b)
}
}

Expand Down
1 change: 1 addition & 0 deletions src/libcollectionstest/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#![feature(collections)]
#![feature(collections_bound)]
#![feature(const_fn)]
#![feature(dedup_by)]
#![feature(enumset)]
#![feature(pattern)]
#![feature(rand)]
Expand Down
41 changes: 0 additions & 41 deletions src/libcollectionstest/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,47 +315,6 @@ fn test_clear() {
// If the unsafe block didn't drop things properly, we blow up here.
}

#[test]
fn test_dedup() {
fn case(a: Vec<i32>, b: Vec<i32>) {
let mut v = a;
v.dedup();
assert_eq!(v, b);
}
case(vec![], vec![]);
case(vec![1], vec![1]);
case(vec![1, 1], vec![1]);
case(vec![1, 2, 3], vec![1, 2, 3]);
case(vec![1, 1, 2, 3], vec![1, 2, 3]);
case(vec![1, 2, 2, 3], vec![1, 2, 3]);
case(vec![1, 2, 3, 3], vec![1, 2, 3]);
case(vec![1, 1, 2, 2, 2, 3, 3], vec![1, 2, 3]);
}

#[test]
fn test_dedup_unique() {
let mut v0: Vec<Box<_>> = vec![box 1, box 1, box 2, box 3];
v0.dedup();
let mut v1: Vec<Box<_>> = vec![box 1, box 2, box 2, box 3];
v1.dedup();
let mut v2: Vec<Box<_>> = vec![box 1, box 2, box 3, box 3];
v2.dedup();
// If the boxed pointers were leaked or otherwise misused, valgrind
// and/or rt should raise errors.
}

#[test]
fn test_dedup_shared() {
let mut v0: Vec<Box<_>> = vec![box 1, box 1, box 2, box 3];
v0.dedup();
let mut v1: Vec<Box<_>> = vec![box 1, box 2, box 2, box 3];
v1.dedup();
let mut v2: Vec<Box<_>> = vec![box 1, box 2, box 3, box 3];
v2.dedup();
// If the pointers were leaked or otherwise misused, valgrind and/or
// rt should raise errors.
}

#[test]
fn test_retain() {
let mut v = vec![1, 2, 3, 4, 5];
Expand Down
55 changes: 55 additions & 0 deletions src/libcollectionstest/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::ascii::AsciiExt;
use std::borrow::Cow;
use std::iter::{FromIterator, repeat};
use std::mem::size_of;
Expand Down Expand Up @@ -213,6 +214,60 @@ fn test_retain() {
assert_eq!(vec, [2, 4]);
}

#[test]
fn test_dedup() {
fn case(a: Vec<i32>, b: Vec<i32>) {
let mut v = a;
v.dedup();
assert_eq!(v, b);
}
case(vec![], vec![]);
case(vec![1], vec![1]);
case(vec![1, 1], vec![1]);
case(vec![1, 2, 3], vec![1, 2, 3]);
case(vec![1, 1, 2, 3], vec![1, 2, 3]);
case(vec![1, 2, 2, 3], vec![1, 2, 3]);
case(vec![1, 2, 3, 3], vec![1, 2, 3]);
case(vec![1, 1, 2, 2, 2, 3, 3], vec![1, 2, 3]);
}

#[test]
fn test_dedup_by_key() {
fn case(a: Vec<i32>, b: Vec<i32>) {
let mut v = a;
v.dedup_by_key(|i| *i / 10);
assert_eq!(v, b);
}
case(vec![], vec![]);
case(vec![10], vec![10]);
case(vec![10, 11], vec![10]);
case(vec![10, 20, 30], vec![10, 20, 30]);
case(vec![10, 11, 20, 30], vec![10, 20, 30]);
case(vec![10, 20, 21, 30], vec![10, 20, 30]);
case(vec![10, 20, 30, 31], vec![10, 20, 30]);
case(vec![10, 11, 20, 21, 22, 30, 31], vec![10, 20, 30]);
}

#[test]
fn test_dedup_by() {
let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"];
vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b));

assert_eq!(vec, ["foo", "bar", "baz", "bar"]);
}

#[test]
fn test_dedup_unique() {
let mut v0: Vec<Box<_>> = vec![box 1, box 1, box 2, box 3];
v0.dedup();
let mut v1: Vec<Box<_>> = vec![box 1, box 2, box 2, box 3];
v1.dedup();
let mut v2: Vec<Box<_>> = vec![box 1, box 2, box 3, box 3];
v2.dedup();
// If the boxed pointers were leaked or otherwise misused, valgrind
// and/or rt should raise errors.
}

#[test]
fn zero_sized_values() {
let mut v = Vec::new();
Expand Down