From dae149cd8e8c0d1cf26fb4cf0b5ab84337bf38e0 Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Tue, 19 Feb 2019 01:01:06 +0100 Subject: [PATCH 01/13] improve worst-case performance of BTreeSet intersection --- src/liballoc/collections/btree/set.rs | 110 ++++++++++++++++++++++---- src/liballoc/tests/btree/set.rs | 13 +++ 2 files changed, 107 insertions(+), 16 deletions(-) diff --git a/src/liballoc/collections/btree/set.rs b/src/liballoc/collections/btree/set.rs index 870e3e47692b0..1f0bf99db3b86 100644 --- a/src/liballoc/collections/btree/set.rs +++ b/src/liballoc/collections/btree/set.rs @@ -155,6 +155,29 @@ impl fmt::Debug for SymmetricDifference<'_, T> { } } +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Debug)] +enum IntersectionOther<'a, T> { + ITER(Peekable>), + SET(&'a BTreeSet), +} + +/// Whether the sizes of two sets are roughly the same order of magnitude. +/// +/// If they are, or if either set is empty, then their intersection +/// is efficiently calculated by iterating both sets jointly. +/// If they aren't, then it is more scalable to iterate over the small set +/// and find matches in the large set (except if the largest element in +/// the small set hardly surpasses the smallest element in the large set). +fn are_proportionate_for_intersection(len1: usize, len2: usize) -> bool { + let (small, large) = if len1 <= len2 { + (len1, len2) + } else { + (len2, len1) + }; + (large >> 7) <= small +} + /// A lazy iterator producing elements in the intersection of `BTreeSet`s. /// /// This `struct` is created by the [`intersection`] method on [`BTreeSet`]. @@ -165,7 +188,7 @@ impl fmt::Debug for SymmetricDifference<'_, T> { #[stable(feature = "rust1", since = "1.0.0")] pub struct Intersection<'a, T: 'a> { a: Peekable>, - b: Peekable>, + b: IntersectionOther<'a, T>, } #[stable(feature = "collection_debug", since = "1.17.0")] @@ -326,9 +349,21 @@ impl BTreeSet { /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn intersection<'a>(&'a self, other: &'a BTreeSet) -> Intersection<'a, T> { - Intersection { - a: self.iter().peekable(), - b: other.iter().peekable(), + if are_proportionate_for_intersection(self.len(), other.len()) { + Intersection { + a: self.iter().peekable(), + b: IntersectionOther::ITER(other.iter().peekable()), + } + } else if self.len() <= other.len() { + Intersection { + a: self.iter().peekable(), + b: IntersectionOther::SET(&other), + } + } else { + Intersection { + a: other.iter().peekable(), + b: IntersectionOther::SET(&self), + } } } @@ -1069,6 +1104,15 @@ impl<'a, T: Ord> Iterator for SymmetricDifference<'a, T> { #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for SymmetricDifference<'_, T> {} +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T> Clone for IntersectionOther<'a, T> { + fn clone(&self) -> IntersectionOther<'a, T> { + match self { + IntersectionOther::ITER(ref iter) => IntersectionOther::ITER(iter.clone()), + IntersectionOther::SET(set) => IntersectionOther::SET(set), + } + } +} #[stable(feature = "rust1", since = "1.0.0")] impl<'a, T> Clone for Intersection<'a, T> { fn clone(&self) -> Intersection<'a, T> { @@ -1083,24 +1127,40 @@ impl<'a, T: Ord> Iterator for Intersection<'a, T> { type Item = &'a T; fn next(&mut self) -> Option<&'a T> { - loop { - match Ord::cmp(self.a.peek()?, self.b.peek()?) { - Less => { - self.a.next(); - } - Equal => { - self.b.next(); - return self.a.next(); + match self.b { + IntersectionOther::ITER(ref mut self_b) => loop { + match Ord::cmp(self.a.peek()?, self_b.peek()?) { + Less => { + self.a.next(); + } + Equal => { + self_b.next(); + return self.a.next(); + } + Greater => { + self_b.next(); + } } - Greater => { - self.b.next(); + }, + IntersectionOther::SET(set) => loop { + match self.a.next() { + None => return None, + Some(e) => { + if set.contains(&e) { + return Some(e); + } + } } - } + }, } } fn size_hint(&self) -> (usize, Option) { - (0, Some(min(self.a.len(), self.b.len()))) + let b_len = match self.b { + IntersectionOther::ITER(ref iter) => iter.len(), + IntersectionOther::SET(set) => set.len(), + }; + (0, Some(min(self.a.len(), b_len))) } } @@ -1140,3 +1200,21 @@ impl<'a, T: Ord> Iterator for Union<'a, T> { #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for Union<'_, T> {} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_are_proportionate_for_intersection() { + assert!(are_proportionate_for_intersection(0, 0)); + assert!(are_proportionate_for_intersection(0, 127)); + assert!(!are_proportionate_for_intersection(0, 128)); + assert!(are_proportionate_for_intersection(1, 255)); + assert!(!are_proportionate_for_intersection(1, 256)); + assert!(are_proportionate_for_intersection(127, 0)); + assert!(!are_proportionate_for_intersection(128, 0)); + assert!(are_proportionate_for_intersection(255, 1)); + assert!(!are_proportionate_for_intersection(256, 1)); + } +} diff --git a/src/liballoc/tests/btree/set.rs b/src/liballoc/tests/btree/set.rs index 4f5168f1ce572..a98e08e0d7ebb 100644 --- a/src/liballoc/tests/btree/set.rs +++ b/src/liballoc/tests/btree/set.rs @@ -69,6 +69,19 @@ fn test_intersection() { check_intersection(&[11, 1, 3, 77, 103, 5, -5], &[2, 11, 77, -9, -42, 5, 3], &[3, 5, 11, 77]); + + let mut large = [0i32; 512]; + for i in 0..512 { + large[i] = i as i32 + } + check_intersection(&large[..], &[], &[]); + check_intersection(&large[..], &[-1], &[]); + check_intersection(&large[..], &[42], &[42]); + check_intersection(&large[..], &[4, 2], &[2, 4]); + check_intersection(&[], &large[..], &[]); + check_intersection(&[-1], &large[..], &[]); + check_intersection(&[42], &large[..], &[42]); + check_intersection(&[4, 2], &large[..], &[2, 4]); } #[test] From fab1fa925320f785324e4be94a724dd8f2dec3b6 Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Tue, 19 Feb 2019 17:11:05 +0100 Subject: [PATCH 02/13] remove the #[stable] thingy --- src/liballoc/collections/btree/set.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/liballoc/collections/btree/set.rs b/src/liballoc/collections/btree/set.rs index 1f0bf99db3b86..88d4f830ecfed 100644 --- a/src/liballoc/collections/btree/set.rs +++ b/src/liballoc/collections/btree/set.rs @@ -155,7 +155,6 @@ impl fmt::Debug for SymmetricDifference<'_, T> { } } -#[stable(feature = "rust1", since = "1.0.0")] #[derive(Debug)] enum IntersectionOther<'a, T> { ITER(Peekable>), From f536f7aec0128568a9ee4a2244b645518d249781 Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Tue, 19 Feb 2019 17:47:06 +0100 Subject: [PATCH 03/13] remove another #[stable] thingy and superfluous comma's --- src/liballoc/collections/btree/set.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/liballoc/collections/btree/set.rs b/src/liballoc/collections/btree/set.rs index 88d4f830ecfed..6466308bea258 100644 --- a/src/liballoc/collections/btree/set.rs +++ b/src/liballoc/collections/btree/set.rs @@ -1103,7 +1103,6 @@ impl<'a, T: Ord> Iterator for SymmetricDifference<'a, T> { #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for SymmetricDifference<'_, T> {} -#[stable(feature = "rust1", since = "1.0.0")] impl<'a, T> Clone for IntersectionOther<'a, T> { fn clone(&self) -> IntersectionOther<'a, T> { match self { @@ -1140,7 +1139,7 @@ impl<'a, T: Ord> Iterator for Intersection<'a, T> { self_b.next(); } } - }, + } IntersectionOther::SET(set) => loop { match self.a.next() { None => return None, @@ -1150,7 +1149,7 @@ impl<'a, T: Ord> Iterator for Intersection<'a, T> { } } } - }, + } } } From 0186d9002f388f344a8a18f469291c6d4644aa64 Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Tue, 19 Feb 2019 22:38:09 +0100 Subject: [PATCH 04/13] renamed enum variants, moved struct declaration down, condensed Intersection.next --- src/liballoc/collections/btree/set.rs | 40 ++++++++++++--------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/src/liballoc/collections/btree/set.rs b/src/liballoc/collections/btree/set.rs index 6466308bea258..e1a25366735bf 100644 --- a/src/liballoc/collections/btree/set.rs +++ b/src/liballoc/collections/btree/set.rs @@ -155,12 +155,6 @@ impl fmt::Debug for SymmetricDifference<'_, T> { } } -#[derive(Debug)] -enum IntersectionOther<'a, T> { - ITER(Peekable>), - SET(&'a BTreeSet), -} - /// Whether the sizes of two sets are roughly the same order of magnitude. /// /// If they are, or if either set is empty, then their intersection @@ -190,6 +184,12 @@ pub struct Intersection<'a, T: 'a> { b: IntersectionOther<'a, T>, } +#[derive(Debug)] +enum IntersectionOther<'a, T> { + Stitch(Peekable>), + Search(&'a BTreeSet), +} + #[stable(feature = "collection_debug", since = "1.17.0")] impl fmt::Debug for Intersection<'_, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -351,17 +351,17 @@ impl BTreeSet { if are_proportionate_for_intersection(self.len(), other.len()) { Intersection { a: self.iter().peekable(), - b: IntersectionOther::ITER(other.iter().peekable()), + b: IntersectionOther::Stitch(other.iter().peekable()), } } else if self.len() <= other.len() { Intersection { a: self.iter().peekable(), - b: IntersectionOther::SET(&other), + b: IntersectionOther::Search(&other), } } else { Intersection { a: other.iter().peekable(), - b: IntersectionOther::SET(&self), + b: IntersectionOther::Search(&self), } } } @@ -1106,8 +1106,8 @@ impl FusedIterator for SymmetricDifference<'_, T> {} impl<'a, T> Clone for IntersectionOther<'a, T> { fn clone(&self) -> IntersectionOther<'a, T> { match self { - IntersectionOther::ITER(ref iter) => IntersectionOther::ITER(iter.clone()), - IntersectionOther::SET(set) => IntersectionOther::SET(set), + IntersectionOther::Stitch(ref iter) => IntersectionOther::Stitch(iter.clone()), + IntersectionOther::Search(set) => IntersectionOther::Search(set), } } } @@ -1126,7 +1126,7 @@ impl<'a, T: Ord> Iterator for Intersection<'a, T> { fn next(&mut self) -> Option<&'a T> { match self.b { - IntersectionOther::ITER(ref mut self_b) => loop { + IntersectionOther::Stitch(ref mut self_b) => loop { match Ord::cmp(self.a.peek()?, self_b.peek()?) { Less => { self.a.next(); @@ -1140,14 +1140,10 @@ impl<'a, T: Ord> Iterator for Intersection<'a, T> { } } } - IntersectionOther::SET(set) => loop { - match self.a.next() { - None => return None, - Some(e) => { - if set.contains(&e) { - return Some(e); - } - } + IntersectionOther::Search(set) => loop { + let e = self.a.next()?; + if set.contains(&e) { + return Some(e); } } } @@ -1155,8 +1151,8 @@ impl<'a, T: Ord> Iterator for Intersection<'a, T> { fn size_hint(&self) -> (usize, Option) { let b_len = match self.b { - IntersectionOther::ITER(ref iter) => iter.len(), - IntersectionOther::SET(set) => set.len(), + IntersectionOther::Stitch(ref iter) => iter.len(), + IntersectionOther::Search(set) => set.len(), }; (0, Some(min(self.a.len(), b_len))) } From 6553b0d5656cc4f5357a4e853d8bda4e07ed0600 Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Fri, 22 Feb 2019 14:46:06 +0100 Subject: [PATCH 05/13] get rid of the refs suggested by rustc --- src/liballoc/collections/btree/set.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/liballoc/collections/btree/set.rs b/src/liballoc/collections/btree/set.rs index ff502b4c38f9d..8634913cdceb3 100644 --- a/src/liballoc/collections/btree/set.rs +++ b/src/liballoc/collections/btree/set.rs @@ -1105,8 +1105,8 @@ impl FusedIterator for SymmetricDifference<'_, T> {} impl<'a, T> Clone for IntersectionOther<'a, T> { fn clone(&self) -> IntersectionOther<'a, T> { - match self { - IntersectionOther::Stitch(ref iter) => IntersectionOther::Stitch(iter.clone()), + match &self { + IntersectionOther::Stitch(iter) => IntersectionOther::Stitch(iter.clone()), IntersectionOther::Search(set) => IntersectionOther::Search(set), } } @@ -1125,8 +1125,8 @@ impl<'a, T: Ord> Iterator for Intersection<'a, T> { type Item = &'a T; fn next(&mut self) -> Option<&'a T> { - match self.b { - IntersectionOther::Stitch(ref mut self_b) => loop { + match &mut self.b { + IntersectionOther::Stitch(self_b) => loop { match Ord::cmp(self.a.peek()?, self_b.peek()?) { Less => { self.a.next(); From 077ecd43a5ec9f9ab72251e13ae2bdd675deabb4 Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Sun, 24 Feb 2019 00:51:43 +0100 Subject: [PATCH 06/13] speed up BTreeSet intersection: skip elements below the other set's minimum --- src/liballoc/collections/btree/set.rs | 91 ++++++++++++++++----------- 1 file changed, 55 insertions(+), 36 deletions(-) diff --git a/src/liballoc/collections/btree/set.rs b/src/liballoc/collections/btree/set.rs index 8634913cdceb3..0d8af1ddafd36 100644 --- a/src/liballoc/collections/btree/set.rs +++ b/src/liballoc/collections/btree/set.rs @@ -3,7 +3,7 @@ use core::borrow::Borrow; use core::cmp::Ordering::{self, Less, Greater, Equal}; -use core::cmp::{min, max}; +use core::cmp::max; use core::fmt::{self, Debug}; use core::iter::{Peekable, FromIterator, FusedIterator}; use core::ops::{BitOr, BitAnd, BitXor, Sub, RangeBounds}; @@ -180,13 +180,14 @@ fn are_proportionate_for_intersection(len1: usize, len2: usize) -> bool { /// [`intersection`]: struct.BTreeSet.html#method.intersection #[stable(feature = "rust1", since = "1.0.0")] pub struct Intersection<'a, T: 'a> { - a: Peekable>, + a: Range<'a, T>, b: IntersectionOther<'a, T>, + max_size: usize, } #[derive(Debug)] enum IntersectionOther<'a, T> { - Stitch(Peekable>), + Stitch(Range<'a, T>), Search(&'a BTreeSet), } @@ -196,6 +197,7 @@ impl fmt::Debug for Intersection<'_, T> { f.debug_tuple("Intersection") .field(&self.a) .field(&self.b) + .field(&self.max_size) .finish() } } @@ -348,20 +350,43 @@ impl BTreeSet { /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn intersection<'a>(&'a self, other: &'a BTreeSet) -> Intersection<'a, T> { - if are_proportionate_for_intersection(self.len(), other.len()) { - Intersection { - a: self.iter().peekable(), - b: IntersectionOther::Stitch(other.iter().peekable()), - } - } else if self.len() <= other.len() { + let (a_set, b_set) = if self.len() <= other.len() { + (self, other) + } else { + (other, self) + }; + if a_set.is_empty() { Intersection { - a: self.iter().peekable(), - b: IntersectionOther::Search(&other), + a: a_set.range(..), + b: IntersectionOther::Search(b_set), + max_size: a_set.len(), } } else { - Intersection { - a: other.iter().peekable(), - b: IntersectionOther::Search(&self), + let a_min = a_set.iter().next().unwrap(); + let b_min = b_set.iter().next().unwrap(); + let ord = Ord::cmp(a_min, b_min); + let a_range = if ord == Less { + a_set.range(b_min..) + } else { + a_set.range(..) + }; + if are_proportionate_for_intersection(self.len(), other.len()) { + let b_range = if ord == Greater { + b_set.range(a_min..) + } else { + b_set.range(..) + }; + Intersection { + a: a_range, + b: IntersectionOther::Stitch(b_range), + max_size: a_set.len(), + } + } else { + Intersection { + a: a_range, + b: IntersectionOther::Search(b_set), + max_size: a_set.len(), + } } } } @@ -1106,7 +1131,7 @@ impl FusedIterator for SymmetricDifference<'_, T> {} impl<'a, T> Clone for IntersectionOther<'a, T> { fn clone(&self) -> IntersectionOther<'a, T> { match &self { - IntersectionOther::Stitch(iter) => IntersectionOther::Stitch(iter.clone()), + IntersectionOther::Stitch(range) => IntersectionOther::Stitch(range.clone()), IntersectionOther::Search(set) => IntersectionOther::Search(set), } } @@ -1117,6 +1142,7 @@ impl Clone for Intersection<'_, T> { Intersection { a: self.a.clone(), b: self.b.clone(), + max_size: self.max_size, } } } @@ -1126,35 +1152,28 @@ impl<'a, T: Ord> Iterator for Intersection<'a, T> { fn next(&mut self) -> Option<&'a T> { match &mut self.b { - IntersectionOther::Stitch(self_b) => loop { - match Ord::cmp(self.a.peek()?, self_b.peek()?) { - Less => { - self.a.next(); - } - Equal => { - self_b.next(); - return self.a.next(); - } - Greater => { - self_b.next(); + IntersectionOther::Stitch(self_b) => { + let mut a_elt = self.a.next()?; + let mut b_elt = self_b.next()?; + loop { + match Ord::cmp(a_elt, b_elt) { + Less => a_elt = self.a.next()?, + Equal => return Some(a_elt), + Greater => b_elt = self_b.next()?, } } } - IntersectionOther::Search(set) => loop { - let e = self.a.next()?; - if set.contains(&e) { - return Some(e); + IntersectionOther::Search(b_set) => loop { + let a_elt = self.a.next()?; + if b_set.contains(&a_elt) { + return Some(a_elt); } - } + }, } } fn size_hint(&self) -> (usize, Option) { - let b_len = match self.b { - IntersectionOther::Stitch(ref iter) => iter.len(), - IntersectionOther::Search(set) => set.len(), - }; - (0, Some(min(self.a.len(), b_len))) + (0, Some(self.max_size)) } } From f35c950c018812275c75bf8533da8e9bfb3794d1 Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Tue, 26 Feb 2019 00:23:04 +0100 Subject: [PATCH 07/13] BTreeSet intersection: lower and benchmark the threshold for the new strategy --- src/liballoc/benches/btree/set.rs | 121 ++++++++++++++++---------- src/liballoc/benches/lib.rs | 1 + src/liballoc/collections/btree/set.rs | 103 ++++++++++------------ 3 files changed, 123 insertions(+), 102 deletions(-) diff --git a/src/liballoc/benches/btree/set.rs b/src/liballoc/benches/btree/set.rs index 08e1db5fbb74d..ef618e7ee9286 100644 --- a/src/liballoc/benches/btree/set.rs +++ b/src/liballoc/benches/btree/set.rs @@ -3,34 +3,32 @@ use std::collections::BTreeSet; use rand::{thread_rng, Rng}; use test::{black_box, Bencher}; -fn random(n1: u32, n2: u32) -> [BTreeSet; 2] { +fn random(n1: usize, n2: usize) -> [BTreeSet; 2] { let mut rng = thread_rng(); - let mut set1 = BTreeSet::new(); - let mut set2 = BTreeSet::new(); - for _ in 0..n1 { - let i = rng.gen::(); - set1.insert(i); - } - for _ in 0..n2 { - let i = rng.gen::(); - set2.insert(i); + let mut sets = [BTreeSet::new(), BTreeSet::new()]; + for i in 0..2 { + while sets[i].len() < [n1,n2][i] { + sets[i].insert(rng.gen()); + } } - [set1, set2] + assert_eq!(sets[0].len(), n1); + assert_eq!(sets[1].len(), n2); + sets } -fn staggered(n1: u32, n2: u32) -> [BTreeSet; 2] { - let mut even = BTreeSet::new(); - let mut odd = BTreeSet::new(); - for i in 0..n1 { - even.insert(i * 2); - } - for i in 0..n2 { - odd.insert(i * 2 + 1); +fn stagger(n1: usize, factor: usize) -> [BTreeSet; 2] { + let n2 = n1 * factor; + let mut sets = [BTreeSet::new(), BTreeSet::new()]; + for i in 0..(n1+n2) { + let b = i % (factor + 1) != 0; + sets[b as usize].insert(i as u32); } - [even, odd] + assert_eq!(sets[0].len(), n1); + assert_eq!(sets[1].len(), n2); + sets } -fn neg_vs_pos(n1: u32, n2: u32) -> [BTreeSet; 2] { +fn neg_vs_pos(n1: usize, n2: usize) -> [BTreeSet; 2] { let mut neg = BTreeSet::new(); let mut pos = BTreeSet::new(); for i in -(n1 as i32)..=-1 { @@ -39,19 +37,17 @@ fn neg_vs_pos(n1: u32, n2: u32) -> [BTreeSet; 2] { for i in 1..=(n2 as i32) { pos.insert(i); } + assert_eq!(neg.len(), n1); + assert_eq!(pos.len(), n2); [neg, pos] } -fn pos_vs_neg(n1: u32, n2: u32) -> [BTreeSet; 2] { - let mut neg = BTreeSet::new(); - let mut pos = BTreeSet::new(); - for i in -(n1 as i32)..=-1 { - neg.insert(i); - } - for i in 1..=(n2 as i32) { - pos.insert(i); - } - [pos, neg] +fn pos_vs_neg(n1: usize, n2: usize) -> [BTreeSet; 2] { + let mut sets = neg_vs_pos(n2, n1); + sets.reverse(); + assert_eq!(sets[0].len(), n1); + assert_eq!(sets[1].len(), n2); + sets } macro_rules! set_intersection_bench { @@ -68,21 +64,52 @@ macro_rules! set_intersection_bench { }) } }; + ($name: ident, $sets: expr, $intersection_kind: ident) => { + #[bench] + pub fn $name(b: &mut Bencher) { + // setup + let sets = $sets; + assert!(sets[0].len() >= 1); + assert!(sets[1].len() >= sets[0].len()); + + // measure + b.iter(|| { + let x = BTreeSet::$intersection_kind(&sets[0], &sets[1]).count(); + black_box(x); + }) + } + }; } -set_intersection_bench! {intersect_random_100, random(100, 100)} -set_intersection_bench! {intersect_random_10k, random(10_000, 10_000)} -set_intersection_bench! {intersect_random_10_vs_10k, random(10, 10_000)} -set_intersection_bench! {intersect_random_10k_vs_10, random(10_000, 10)} -set_intersection_bench! {intersect_staggered_100, staggered(100, 100)} -set_intersection_bench! {intersect_staggered_10k, staggered(10_000, 10_000)} -set_intersection_bench! {intersect_staggered_10_vs_10k, staggered(10, 10_000)} -set_intersection_bench! {intersect_staggered_10k_vs_10, staggered(10_000, 10)} -set_intersection_bench! {intersect_neg_vs_pos_100, neg_vs_pos(100, 100)} -set_intersection_bench! {intersect_neg_vs_pos_10k, neg_vs_pos(10_000, 10_000)} -set_intersection_bench! {intersect_neg_vs_pos_10_vs_10k,neg_vs_pos(10, 10_000)} -set_intersection_bench! {intersect_neg_vs_pos_10k_vs_10,neg_vs_pos(10_000, 10)} -set_intersection_bench! {intersect_pos_vs_neg_100, pos_vs_neg(100, 100)} -set_intersection_bench! {intersect_pos_vs_neg_10k, pos_vs_neg(10_000, 10_000)} -set_intersection_bench! {intersect_pos_vs_neg_10_vs_10k,pos_vs_neg(10, 10_000)} -set_intersection_bench! {intersect_pos_vs_neg_10k_vs_10,pos_vs_neg(10_000, 10)} +set_intersection_bench! {intersect_neg_vs_pos_100, neg_vs_pos(100, 100)} +set_intersection_bench! {intersect_neg_vs_pos_10k, neg_vs_pos(10_000, 10_000)} +set_intersection_bench! {intersect_neg_vs_pos_10_vs_10k, neg_vs_pos(10, 10_000)} +set_intersection_bench! {intersect_neg_vs_pos_10k_vs_10, neg_vs_pos(10_000, 10)} +set_intersection_bench! {intersect_pos_vs_neg_100, pos_vs_neg(100, 100)} +set_intersection_bench! {intersect_pos_vs_neg_10k, pos_vs_neg(10_000, 10_000)} +set_intersection_bench! {intersect_pos_vs_neg_10_vs_10k, pos_vs_neg(10, 10_000)} +set_intersection_bench! {intersect_pos_vs_neg_10k_vs_10, pos_vs_neg(10_000, 10)} +set_intersection_bench! {intersect_random_100, random(100, 100)} +set_intersection_bench! {intersect_random_10k, random(10_000, 10_000)} +set_intersection_bench! {intersect_random_10_vs_10k, random(10, 10_000)} +set_intersection_bench! {intersect_random_10k_vs_10, random(10_000, 10)} +set_intersection_bench! {intersect_stagger_10k, stagger(10_000, 1)} +set_intersection_bench! {intersect_stagger_100, stagger(100, 1)} +set_intersection_bench! {intersect_stagger_100_df1, stagger(100, 1 << 1)} +set_intersection_bench! {intersect_stagger_100_df1_stitch, stagger(100, 1 << 1), intersection_stitch} +set_intersection_bench! {intersect_stagger_100_df1_search, stagger(100, 1 << 1), intersection_search} +set_intersection_bench! {intersect_stagger_100_df2, stagger(100, 1 << 2)} +set_intersection_bench! {intersect_stagger_100_df2_stitch, stagger(100, 1 << 2), intersection_stitch} +set_intersection_bench! {intersect_stagger_100_df2_search, stagger(100, 1 << 2), intersection_search} +set_intersection_bench! {intersect_stagger_100_df3, stagger(100, 1 << 3)} +set_intersection_bench! {intersect_stagger_100_df3_stitch, stagger(100, 1 << 3), intersection_stitch} +set_intersection_bench! {intersect_stagger_100_df3_search, stagger(100, 1 << 3), intersection_search} +set_intersection_bench! {intersect_stagger_100_df4, stagger(100, 1 << 4)} +set_intersection_bench! {intersect_stagger_100_df4_stitch, stagger(100, 1 << 4), intersection_stitch} +set_intersection_bench! {intersect_stagger_100_df4_search, stagger(100, 1 << 4), intersection_search} +set_intersection_bench! {intersect_stagger_100_df5, stagger(100, 1 << 5)} +set_intersection_bench! {intersect_stagger_100_df5_stitch, stagger(100, 1 << 5), intersection_stitch} +set_intersection_bench! {intersect_stagger_100_df5_search, stagger(100, 1 << 5), intersection_search} +set_intersection_bench! {intersect_stagger_100_df6, stagger(100, 1 << 6)} +set_intersection_bench! {intersect_stagger_100_df6_stitch, stagger(100, 1 << 6), intersection_stitch} +set_intersection_bench! {intersect_stagger_100_df6_search, stagger(100, 1 << 6), intersection_search} diff --git a/src/liballoc/benches/lib.rs b/src/liballoc/benches/lib.rs index a1884b7d54852..7775b2c704c2e 100644 --- a/src/liballoc/benches/lib.rs +++ b/src/liballoc/benches/lib.rs @@ -1,5 +1,6 @@ #![feature(repr_simd)] #![feature(test)] +#![feature(benches_btree_set)] extern crate rand; extern crate rand_xorshift; diff --git a/src/liballoc/collections/btree/set.rs b/src/liballoc/collections/btree/set.rs index 0d8af1ddafd36..48d108227a1a5 100644 --- a/src/liballoc/collections/btree/set.rs +++ b/src/liballoc/collections/btree/set.rs @@ -158,17 +158,19 @@ impl fmt::Debug for SymmetricDifference<'_, T> { /// Whether the sizes of two sets are roughly the same order of magnitude. /// /// If they are, or if either set is empty, then their intersection -/// is efficiently calculated by iterating both sets jointly. +/// is efficiently calculated by iterating the common range of both sets jointly. /// If they aren't, then it is more scalable to iterate over the small set -/// and find matches in the large set (except if the largest element in -/// the small set hardly surpasses the smallest element in the large set). -fn are_proportionate_for_intersection(len1: usize, len2: usize) -> bool { - let (small, large) = if len1 <= len2 { - (len1, len2) - } else { - (len2, len1) - }; - (large >> 7) <= small +/// and find matches in the large set. +const fn are_proportionate_for_intersection(small: usize, large: usize) -> bool { + (large >> 4) < small +} +struct _AssertProportionateForIntersection { + a: [(); are_proportionate_for_intersection(1, 15) as usize - 1], + b: [(); !are_proportionate_for_intersection(1, 16) as usize - 1], + c: [(); are_proportionate_for_intersection(2, 31) as usize - 1], + d: [(); !are_proportionate_for_intersection(2, 32) as usize - 1], + e: [(); are_proportionate_for_intersection(3, 47) as usize - 1], + f: [(); !are_proportionate_for_intersection(3, 48) as usize - 1], } /// A lazy iterator producing elements in the intersection of `BTreeSet`s. @@ -359,35 +361,44 @@ impl BTreeSet { Intersection { a: a_set.range(..), b: IntersectionOther::Search(b_set), - max_size: a_set.len(), + max_size: 0, } + } else if are_proportionate_for_intersection(a_set.len(), b_set.len()) { + Self::intersection_stitch(a_set, b_set) } else { - let a_min = a_set.iter().next().unwrap(); - let b_min = b_set.iter().next().unwrap(); - let ord = Ord::cmp(a_min, b_min); - let a_range = if ord == Less { - a_set.range(b_min..) - } else { - a_set.range(..) - }; - if are_proportionate_for_intersection(self.len(), other.len()) { - let b_range = if ord == Greater { - b_set.range(a_min..) - } else { - b_set.range(..) - }; - Intersection { - a: a_range, - b: IntersectionOther::Stitch(b_range), - max_size: a_set.len(), - } - } else { - Intersection { - a: a_range, - b: IntersectionOther::Search(b_set), - max_size: a_set.len(), - } - } + Self::intersection_search(a_set, b_set) + } + } + #[doc(hidden)] + #[unstable(feature = "benches_btree_set", reason = "allow benchmarking for pull #58577", issue = "0")] + pub fn intersection_stitch<'a>(a_set: &'a BTreeSet, b_set: &'a BTreeSet) -> Intersection<'a, T> { + let a_min = a_set.iter().next().unwrap(); + let b_min = b_set.iter().next().unwrap(); + let (a_range, b_range) = match Ord::cmp(a_min, b_min) { + Less => (a_set.range(b_min..), b_set.range(..)), + Equal => (a_set.range(..), b_set.range(..)), + Greater => (a_set.range(..), b_set.range(a_min..)), + }; + Intersection { + a: a_range, + b: IntersectionOther::Stitch(b_range), + max_size: a_set.len(), + } + } + #[doc(hidden)] + #[unstable(feature = "benches_btree_set", reason = "allow benchmarking for pull #58577", issue = "0")] + pub fn intersection_search<'a>(a_set: &'a BTreeSet, b_set: &'a BTreeSet) -> Intersection<'a, T> { + let a_min = a_set.iter().next().unwrap(); + let b_min = b_set.iter().next().unwrap(); + let a_range = match Ord::cmp(a_min, b_min) { + Less => a_set.range(b_min..), + Equal => a_set.range(..), + Greater => a_set.range(..), + }; + Intersection { + a: a_range, + b: IntersectionOther::Search(b_set), + max_size: a_set.len(), } } @@ -1213,21 +1224,3 @@ impl<'a, T: Ord> Iterator for Union<'a, T> { #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for Union<'_, T> {} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_are_proportionate_for_intersection() { - assert!(are_proportionate_for_intersection(0, 0)); - assert!(are_proportionate_for_intersection(0, 127)); - assert!(!are_proportionate_for_intersection(0, 128)); - assert!(are_proportionate_for_intersection(1, 255)); - assert!(!are_proportionate_for_intersection(1, 256)); - assert!(are_proportionate_for_intersection(127, 0)); - assert!(!are_proportionate_for_intersection(128, 0)); - assert!(are_proportionate_for_intersection(255, 1)); - assert!(!are_proportionate_for_intersection(256, 1)); - } -} From e68dc44783323231e3fa4ab296deda196baab9c5 Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Tue, 26 Feb 2019 10:57:36 +0100 Subject: [PATCH 08/13] tidy up long lines --- src/liballoc/benches/btree/set.rs | 70 +++++++++++++-------------- src/liballoc/collections/btree/set.rs | 14 ++++-- 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/src/liballoc/benches/btree/set.rs b/src/liballoc/benches/btree/set.rs index ef618e7ee9286..de9721e1e87e5 100644 --- a/src/liballoc/benches/btree/set.rs +++ b/src/liballoc/benches/btree/set.rs @@ -7,7 +7,7 @@ fn random(n1: usize, n2: usize) -> [BTreeSet; 2] { let mut rng = thread_rng(); let mut sets = [BTreeSet::new(), BTreeSet::new()]; for i in 0..2 { - while sets[i].len() < [n1,n2][i] { + while sets[i].len() < [n1, n2][i] { sets[i].insert(rng.gen()); } } @@ -19,7 +19,7 @@ fn random(n1: usize, n2: usize) -> [BTreeSet; 2] { fn stagger(n1: usize, factor: usize) -> [BTreeSet; 2] { let n2 = n1 * factor; let mut sets = [BTreeSet::new(), BTreeSet::new()]; - for i in 0..(n1+n2) { + for i in 0..(n1 + n2) { let b = i % (factor + 1) != 0; sets[b as usize].insert(i as u32); } @@ -50,7 +50,7 @@ fn pos_vs_neg(n1: usize, n2: usize) -> [BTreeSet; 2] { sets } -macro_rules! set_intersection_bench { +macro_rules! intersection_bench { ($name: ident, $sets: expr) => { #[bench] pub fn $name(b: &mut Bencher) { @@ -81,35 +81,35 @@ macro_rules! set_intersection_bench { }; } -set_intersection_bench! {intersect_neg_vs_pos_100, neg_vs_pos(100, 100)} -set_intersection_bench! {intersect_neg_vs_pos_10k, neg_vs_pos(10_000, 10_000)} -set_intersection_bench! {intersect_neg_vs_pos_10_vs_10k, neg_vs_pos(10, 10_000)} -set_intersection_bench! {intersect_neg_vs_pos_10k_vs_10, neg_vs_pos(10_000, 10)} -set_intersection_bench! {intersect_pos_vs_neg_100, pos_vs_neg(100, 100)} -set_intersection_bench! {intersect_pos_vs_neg_10k, pos_vs_neg(10_000, 10_000)} -set_intersection_bench! {intersect_pos_vs_neg_10_vs_10k, pos_vs_neg(10, 10_000)} -set_intersection_bench! {intersect_pos_vs_neg_10k_vs_10, pos_vs_neg(10_000, 10)} -set_intersection_bench! {intersect_random_100, random(100, 100)} -set_intersection_bench! {intersect_random_10k, random(10_000, 10_000)} -set_intersection_bench! {intersect_random_10_vs_10k, random(10, 10_000)} -set_intersection_bench! {intersect_random_10k_vs_10, random(10_000, 10)} -set_intersection_bench! {intersect_stagger_10k, stagger(10_000, 1)} -set_intersection_bench! {intersect_stagger_100, stagger(100, 1)} -set_intersection_bench! {intersect_stagger_100_df1, stagger(100, 1 << 1)} -set_intersection_bench! {intersect_stagger_100_df1_stitch, stagger(100, 1 << 1), intersection_stitch} -set_intersection_bench! {intersect_stagger_100_df1_search, stagger(100, 1 << 1), intersection_search} -set_intersection_bench! {intersect_stagger_100_df2, stagger(100, 1 << 2)} -set_intersection_bench! {intersect_stagger_100_df2_stitch, stagger(100, 1 << 2), intersection_stitch} -set_intersection_bench! {intersect_stagger_100_df2_search, stagger(100, 1 << 2), intersection_search} -set_intersection_bench! {intersect_stagger_100_df3, stagger(100, 1 << 3)} -set_intersection_bench! {intersect_stagger_100_df3_stitch, stagger(100, 1 << 3), intersection_stitch} -set_intersection_bench! {intersect_stagger_100_df3_search, stagger(100, 1 << 3), intersection_search} -set_intersection_bench! {intersect_stagger_100_df4, stagger(100, 1 << 4)} -set_intersection_bench! {intersect_stagger_100_df4_stitch, stagger(100, 1 << 4), intersection_stitch} -set_intersection_bench! {intersect_stagger_100_df4_search, stagger(100, 1 << 4), intersection_search} -set_intersection_bench! {intersect_stagger_100_df5, stagger(100, 1 << 5)} -set_intersection_bench! {intersect_stagger_100_df5_stitch, stagger(100, 1 << 5), intersection_stitch} -set_intersection_bench! {intersect_stagger_100_df5_search, stagger(100, 1 << 5), intersection_search} -set_intersection_bench! {intersect_stagger_100_df6, stagger(100, 1 << 6)} -set_intersection_bench! {intersect_stagger_100_df6_stitch, stagger(100, 1 << 6), intersection_stitch} -set_intersection_bench! {intersect_stagger_100_df6_search, stagger(100, 1 << 6), intersection_search} +intersection_bench! {intersect_neg_vs_pos_100, neg_vs_pos(100, 100)} +intersection_bench! {intersect_neg_vs_pos_10k, neg_vs_pos(10_000, 10_000)} +intersection_bench! {intersect_neg_vs_pos_10_vs_10k, neg_vs_pos(10, 10_000)} +intersection_bench! {intersect_neg_vs_pos_10k_vs_10, neg_vs_pos(10_000, 10)} +intersection_bench! {intersect_pos_vs_neg_100, pos_vs_neg(100, 100)} +intersection_bench! {intersect_pos_vs_neg_10k, pos_vs_neg(10_000, 10_000)} +intersection_bench! {intersect_pos_vs_neg_10_vs_10k, pos_vs_neg(10, 10_000)} +intersection_bench! {intersect_pos_vs_neg_10k_vs_10, pos_vs_neg(10_000, 10)} +intersection_bench! {intersect_random_100, random(100, 100)} +intersection_bench! {intersect_random_10k, random(10_000, 10_000)} +intersection_bench! {intersect_random_10_vs_10k, random(10, 10_000)} +intersection_bench! {intersect_random_10k_vs_10, random(10_000, 10)} +intersection_bench! {intersect_stagger_10k, stagger(10_000, 1)} +intersection_bench! {intersect_stagger_100, stagger(100, 1)} +intersection_bench! {intersect_stagger_100_df1, stagger(100, 1 << 1)} +intersection_bench! {intersect_stagger_100_df1_stitch, stagger(100, 1 << 1), intersection_stitch} +intersection_bench! {intersect_stagger_100_df1_search, stagger(100, 1 << 1), intersection_search} +intersection_bench! {intersect_stagger_100_df2, stagger(100, 1 << 2)} +intersection_bench! {intersect_stagger_100_df2_stitch, stagger(100, 1 << 2), intersection_stitch} +intersection_bench! {intersect_stagger_100_df2_search, stagger(100, 1 << 2), intersection_search} +intersection_bench! {intersect_stagger_100_df3, stagger(100, 1 << 3)} +intersection_bench! {intersect_stagger_100_df3_stitch, stagger(100, 1 << 3), intersection_stitch} +intersection_bench! {intersect_stagger_100_df3_search, stagger(100, 1 << 3), intersection_search} +intersection_bench! {intersect_stagger_100_df4, stagger(100, 1 << 4)} +intersection_bench! {intersect_stagger_100_df4_stitch, stagger(100, 1 << 4), intersection_stitch} +intersection_bench! {intersect_stagger_100_df4_search, stagger(100, 1 << 4), intersection_search} +intersection_bench! {intersect_stagger_100_df5, stagger(100, 1 << 5)} +intersection_bench! {intersect_stagger_100_df5_stitch, stagger(100, 1 << 5), intersection_stitch} +intersection_bench! {intersect_stagger_100_df5_search, stagger(100, 1 << 5), intersection_search} +intersection_bench! {intersect_stagger_100_df6, stagger(100, 1 << 6)} +intersection_bench! {intersect_stagger_100_df6_stitch, stagger(100, 1 << 6), intersection_stitch} +intersection_bench! {intersect_stagger_100_df6_search, stagger(100, 1 << 6), intersection_search} diff --git a/src/liballoc/collections/btree/set.rs b/src/liballoc/collections/btree/set.rs index 48d108227a1a5..71b105321392c 100644 --- a/src/liballoc/collections/btree/set.rs +++ b/src/liballoc/collections/btree/set.rs @@ -370,8 +370,11 @@ impl BTreeSet { } } #[doc(hidden)] - #[unstable(feature = "benches_btree_set", reason = "allow benchmarking for pull #58577", issue = "0")] - pub fn intersection_stitch<'a>(a_set: &'a BTreeSet, b_set: &'a BTreeSet) -> Intersection<'a, T> { + #[unstable(feature = "benches_btree_set", reason = "benchmarking for pull #58577", issue = "0")] + pub fn intersection_stitch<'a>( + a_set: &'a BTreeSet, + b_set: &'a BTreeSet, + ) -> Intersection<'a, T> { let a_min = a_set.iter().next().unwrap(); let b_min = b_set.iter().next().unwrap(); let (a_range, b_range) = match Ord::cmp(a_min, b_min) { @@ -386,8 +389,11 @@ impl BTreeSet { } } #[doc(hidden)] - #[unstable(feature = "benches_btree_set", reason = "allow benchmarking for pull #58577", issue = "0")] - pub fn intersection_search<'a>(a_set: &'a BTreeSet, b_set: &'a BTreeSet) -> Intersection<'a, T> { + #[unstable(feature = "benches_btree_set", reason = "benchmarking for pull #58577", issue = "0")] + pub fn intersection_search<'a>( + a_set: &'a BTreeSet, + b_set: &'a BTreeSet, + ) -> Intersection<'a, T> { let a_min = a_set.iter().next().unwrap(); let b_min = b_set.iter().next().unwrap(); let a_range = match Ord::cmp(a_min, b_min) { From 5559d11b9654b268f51017c45c8c6548b9259730 Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Tue, 26 Feb 2019 13:47:04 +0100 Subject: [PATCH 09/13] BTreeSet intersection: simple optimization for singleton; dissolve function because its outcome is reported by the benchmarks --- src/liballoc/benches/btree/set.rs | 17 ++++---- src/liballoc/collections/btree/set.rs | 58 +++++++++++---------------- 2 files changed, 33 insertions(+), 42 deletions(-) diff --git a/src/liballoc/benches/btree/set.rs b/src/liballoc/benches/btree/set.rs index de9721e1e87e5..643b9d8149f26 100644 --- a/src/liballoc/benches/btree/set.rs +++ b/src/liballoc/benches/btree/set.rs @@ -93,23 +93,26 @@ intersection_bench! {intersect_random_100, random(100, 100)} intersection_bench! {intersect_random_10k, random(10_000, 10_000)} intersection_bench! {intersect_random_10_vs_10k, random(10, 10_000)} intersection_bench! {intersect_random_10k_vs_10, random(10_000, 10)} -intersection_bench! {intersect_stagger_10k, stagger(10_000, 1)} +intersection_bench! {intersect_stagger_1_actual, stagger(1, 1)} +intersection_bench! {intersect_stagger_1_stitch, stagger(1, 1), intersection_stitch} +intersection_bench! {intersect_stagger_1_search, stagger(1, 1), intersection_search} intersection_bench! {intersect_stagger_100, stagger(100, 1)} -intersection_bench! {intersect_stagger_100_df1, stagger(100, 1 << 1)} +intersection_bench! {intersect_stagger_10k, stagger(10_000, 1)} +intersection_bench! {intersect_stagger_100_df1_actual, stagger(100, 1 << 1)} intersection_bench! {intersect_stagger_100_df1_stitch, stagger(100, 1 << 1), intersection_stitch} intersection_bench! {intersect_stagger_100_df1_search, stagger(100, 1 << 1), intersection_search} -intersection_bench! {intersect_stagger_100_df2, stagger(100, 1 << 2)} +intersection_bench! {intersect_stagger_100_df2_actual, stagger(100, 1 << 2)} intersection_bench! {intersect_stagger_100_df2_stitch, stagger(100, 1 << 2), intersection_stitch} intersection_bench! {intersect_stagger_100_df2_search, stagger(100, 1 << 2), intersection_search} -intersection_bench! {intersect_stagger_100_df3, stagger(100, 1 << 3)} +intersection_bench! {intersect_stagger_100_df3_actual, stagger(100, 1 << 3)} intersection_bench! {intersect_stagger_100_df3_stitch, stagger(100, 1 << 3), intersection_stitch} intersection_bench! {intersect_stagger_100_df3_search, stagger(100, 1 << 3), intersection_search} -intersection_bench! {intersect_stagger_100_df4, stagger(100, 1 << 4)} +intersection_bench! {intersect_stagger_100_df4_actual, stagger(100, 1 << 4)} intersection_bench! {intersect_stagger_100_df4_stitch, stagger(100, 1 << 4), intersection_stitch} intersection_bench! {intersect_stagger_100_df4_search, stagger(100, 1 << 4), intersection_search} -intersection_bench! {intersect_stagger_100_df5, stagger(100, 1 << 5)} +intersection_bench! {intersect_stagger_100_df5_actual, stagger(100, 1 << 5)} intersection_bench! {intersect_stagger_100_df5_stitch, stagger(100, 1 << 5), intersection_stitch} intersection_bench! {intersect_stagger_100_df5_search, stagger(100, 1 << 5), intersection_search} -intersection_bench! {intersect_stagger_100_df6, stagger(100, 1 << 6)} +intersection_bench! {intersect_stagger_100_df6_actual, stagger(100, 1 << 6)} intersection_bench! {intersect_stagger_100_df6_stitch, stagger(100, 1 << 6), intersection_stitch} intersection_bench! {intersect_stagger_100_df6_search, stagger(100, 1 << 6), intersection_search} diff --git a/src/liballoc/collections/btree/set.rs b/src/liballoc/collections/btree/set.rs index 71b105321392c..6ee020b482c7a 100644 --- a/src/liballoc/collections/btree/set.rs +++ b/src/liballoc/collections/btree/set.rs @@ -155,24 +155,6 @@ impl fmt::Debug for SymmetricDifference<'_, T> { } } -/// Whether the sizes of two sets are roughly the same order of magnitude. -/// -/// If they are, or if either set is empty, then their intersection -/// is efficiently calculated by iterating the common range of both sets jointly. -/// If they aren't, then it is more scalable to iterate over the small set -/// and find matches in the large set. -const fn are_proportionate_for_intersection(small: usize, large: usize) -> bool { - (large >> 4) < small -} -struct _AssertProportionateForIntersection { - a: [(); are_proportionate_for_intersection(1, 15) as usize - 1], - b: [(); !are_proportionate_for_intersection(1, 16) as usize - 1], - c: [(); are_proportionate_for_intersection(2, 31) as usize - 1], - d: [(); !are_proportionate_for_intersection(2, 32) as usize - 1], - e: [(); are_proportionate_for_intersection(3, 47) as usize - 1], - f: [(); !are_proportionate_for_intersection(3, 48) as usize - 1], -} - /// A lazy iterator producing elements in the intersection of `BTreeSet`s. /// /// This `struct` is created by the [`intersection`] method on [`BTreeSet`]. @@ -357,53 +339,59 @@ impl BTreeSet { } else { (other, self) }; - if a_set.is_empty() { + if a_set.len() <= 1 { + // At least one set is empty or a singleton, so determining + // a common range doesn't work or is wasteful. Intersection { a: a_set.range(..), b: IntersectionOther::Search(b_set), - max_size: 0, + max_size: a_set.len(), } - } else if are_proportionate_for_intersection(a_set.len(), b_set.len()) { - Self::intersection_stitch(a_set, b_set) - } else { + } else if a_set.len() < b_set.len() / 16 { + // Large set is much larger, so it's faster to iterate the small set + // and find matches in the large set. Self::intersection_search(a_set, b_set) + } else { + // Both sets are roughly of similar size, so it's efficient + // to iterate both over the common range. + Self::intersection_stitch(a_set, b_set) } } #[doc(hidden)] #[unstable(feature = "benches_btree_set", reason = "benchmarking for pull #58577", issue = "0")] - pub fn intersection_stitch<'a>( + pub fn intersection_search<'a>( a_set: &'a BTreeSet, b_set: &'a BTreeSet, ) -> Intersection<'a, T> { let a_min = a_set.iter().next().unwrap(); let b_min = b_set.iter().next().unwrap(); - let (a_range, b_range) = match Ord::cmp(a_min, b_min) { - Less => (a_set.range(b_min..), b_set.range(..)), - Equal => (a_set.range(..), b_set.range(..)), - Greater => (a_set.range(..), b_set.range(a_min..)), + let a_range = match Ord::cmp(a_min, b_min) { + Less => a_set.range(b_min..), + Equal => a_set.range(..), + Greater => a_set.range(..), }; Intersection { a: a_range, - b: IntersectionOther::Stitch(b_range), + b: IntersectionOther::Search(b_set), max_size: a_set.len(), } } #[doc(hidden)] #[unstable(feature = "benches_btree_set", reason = "benchmarking for pull #58577", issue = "0")] - pub fn intersection_search<'a>( + pub fn intersection_stitch<'a>( a_set: &'a BTreeSet, b_set: &'a BTreeSet, ) -> Intersection<'a, T> { let a_min = a_set.iter().next().unwrap(); let b_min = b_set.iter().next().unwrap(); - let a_range = match Ord::cmp(a_min, b_min) { - Less => a_set.range(b_min..), - Equal => a_set.range(..), - Greater => a_set.range(..), + let (a_range, b_range) = match Ord::cmp(a_min, b_min) { + Less => (a_set.range(b_min..), b_set.range(..)), + Equal => (a_set.range(..), b_set.range(..)), + Greater => (a_set.range(..), b_set.range(a_min..)), }; Intersection { a: a_range, - b: IntersectionOther::Search(b_set), + b: IntersectionOther::Stitch(b_range), max_size: a_set.len(), } } From a0d8f241bc1f60a59631f7b08a3d3a2dcb7c0f6d Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Wed, 27 Feb 2019 12:29:07 +0100 Subject: [PATCH 10/13] BTreeSet intersection: adapt to recent changes merged in --- src/liballoc/collections/btree/set.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/liballoc/collections/btree/set.rs b/src/liballoc/collections/btree/set.rs index 6ee020b482c7a..0cabb8a0f3782 100644 --- a/src/liballoc/collections/btree/set.rs +++ b/src/liballoc/collections/btree/set.rs @@ -1133,8 +1133,8 @@ impl<'a, T: Ord> Iterator for SymmetricDifference<'a, T> { #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for SymmetricDifference<'_, T> {} -impl<'a, T> Clone for IntersectionOther<'a, T> { - fn clone(&self) -> IntersectionOther<'a, T> { +impl Clone for IntersectionOther<'_, T> { + fn clone(&self) -> Self { match &self { IntersectionOther::Stitch(range) => IntersectionOther::Stitch(range.clone()), IntersectionOther::Search(set) => IntersectionOther::Search(set), From 0d12ecf086c501083073d3e09cec33bada8cabd4 Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Wed, 27 Feb 2019 23:38:28 +0100 Subject: [PATCH 11/13] BTreeSet benchmarks: rename and reorder them to match with ascii-ordered output; add a few more --- src/liballoc/benches/btree/set.rs | 75 +++++++++++++++++-------------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/src/liballoc/benches/btree/set.rs b/src/liballoc/benches/btree/set.rs index 643b9d8149f26..5d0aaca8648c8 100644 --- a/src/liballoc/benches/btree/set.rs +++ b/src/liballoc/benches/btree/set.rs @@ -81,38 +81,47 @@ macro_rules! intersection_bench { }; } -intersection_bench! {intersect_neg_vs_pos_100, neg_vs_pos(100, 100)} -intersection_bench! {intersect_neg_vs_pos_10k, neg_vs_pos(10_000, 10_000)} -intersection_bench! {intersect_neg_vs_pos_10_vs_10k, neg_vs_pos(10, 10_000)} -intersection_bench! {intersect_neg_vs_pos_10k_vs_10, neg_vs_pos(10_000, 10)} -intersection_bench! {intersect_pos_vs_neg_100, pos_vs_neg(100, 100)} -intersection_bench! {intersect_pos_vs_neg_10k, pos_vs_neg(10_000, 10_000)} -intersection_bench! {intersect_pos_vs_neg_10_vs_10k, pos_vs_neg(10, 10_000)} -intersection_bench! {intersect_pos_vs_neg_10k_vs_10, pos_vs_neg(10_000, 10)} -intersection_bench! {intersect_random_100, random(100, 100)} -intersection_bench! {intersect_random_10k, random(10_000, 10_000)} -intersection_bench! {intersect_random_10_vs_10k, random(10, 10_000)} -intersection_bench! {intersect_random_10k_vs_10, random(10_000, 10)} +intersection_bench! {intersect_100_neg_vs_100_pos, neg_vs_pos(100, 100)} +intersection_bench! {intersect_100_neg_vs_10k_pos, neg_vs_pos(100, 10_000)} +intersection_bench! {intersect_100_pos_vs_100_neg, pos_vs_neg(100, 100)} +intersection_bench! {intersect_100_pos_vs_10k_neg, pos_vs_neg(100, 10_000)} +intersection_bench! {intersect_10k_neg_vs_100_pos, neg_vs_pos(10_000, 100)} +intersection_bench! {intersect_10k_neg_vs_10k_pos, neg_vs_pos(10_000, 10_000)} +intersection_bench! {intersect_10k_pos_vs_100_neg, pos_vs_neg(10_000, 100)} +intersection_bench! {intersect_10k_pos_vs_10k_neg, pos_vs_neg(10_000, 10_000)} +intersection_bench! {intersect_random_100_vs_100_actual,random(100, 100)} +intersection_bench! {intersect_random_100_vs_100_search,random(100, 100), intersection_search} +intersection_bench! {intersect_random_100_vs_100_stitch,random(100, 100), intersection_stitch} +intersection_bench! {intersect_random_100_vs_10k_actual,random(100, 10_000)} +intersection_bench! {intersect_random_100_vs_10k_search,random(100, 10_000), intersection_search} +intersection_bench! {intersect_random_100_vs_10k_stitch,random(100, 10_000), intersection_stitch} +intersection_bench! {intersect_random_10k_vs_10k_actual,random(10_000, 10_000)} +intersection_bench! {intersect_random_10k_vs_10k_search,random(10_000, 10_000)} +intersection_bench! {intersect_random_10k_vs_10k_stitch,random(10_000, 10_000)} +intersection_bench! {intersect_stagger_100_actual, stagger(100, 1)} +intersection_bench! {intersect_stagger_100_search, stagger(100, 1), intersection_search} +intersection_bench! {intersect_stagger_100_stitch, stagger(100, 1), intersection_stitch} +intersection_bench! {intersect_stagger_10k_actual, stagger(10_000, 1)} +intersection_bench! {intersect_stagger_10k_search, stagger(10_000, 1), intersection_search} +intersection_bench! {intersect_stagger_10k_stitch, stagger(10_000, 1), intersection_stitch} intersection_bench! {intersect_stagger_1_actual, stagger(1, 1)} -intersection_bench! {intersect_stagger_1_stitch, stagger(1, 1), intersection_stitch} intersection_bench! {intersect_stagger_1_search, stagger(1, 1), intersection_search} -intersection_bench! {intersect_stagger_100, stagger(100, 1)} -intersection_bench! {intersect_stagger_10k, stagger(10_000, 1)} -intersection_bench! {intersect_stagger_100_df1_actual, stagger(100, 1 << 1)} -intersection_bench! {intersect_stagger_100_df1_stitch, stagger(100, 1 << 1), intersection_stitch} -intersection_bench! {intersect_stagger_100_df1_search, stagger(100, 1 << 1), intersection_search} -intersection_bench! {intersect_stagger_100_df2_actual, stagger(100, 1 << 2)} -intersection_bench! {intersect_stagger_100_df2_stitch, stagger(100, 1 << 2), intersection_stitch} -intersection_bench! {intersect_stagger_100_df2_search, stagger(100, 1 << 2), intersection_search} -intersection_bench! {intersect_stagger_100_df3_actual, stagger(100, 1 << 3)} -intersection_bench! {intersect_stagger_100_df3_stitch, stagger(100, 1 << 3), intersection_stitch} -intersection_bench! {intersect_stagger_100_df3_search, stagger(100, 1 << 3), intersection_search} -intersection_bench! {intersect_stagger_100_df4_actual, stagger(100, 1 << 4)} -intersection_bench! {intersect_stagger_100_df4_stitch, stagger(100, 1 << 4), intersection_stitch} -intersection_bench! {intersect_stagger_100_df4_search, stagger(100, 1 << 4), intersection_search} -intersection_bench! {intersect_stagger_100_df5_actual, stagger(100, 1 << 5)} -intersection_bench! {intersect_stagger_100_df5_stitch, stagger(100, 1 << 5), intersection_stitch} -intersection_bench! {intersect_stagger_100_df5_search, stagger(100, 1 << 5), intersection_search} -intersection_bench! {intersect_stagger_100_df6_actual, stagger(100, 1 << 6)} -intersection_bench! {intersect_stagger_100_df6_stitch, stagger(100, 1 << 6), intersection_stitch} -intersection_bench! {intersect_stagger_100_df6_search, stagger(100, 1 << 6), intersection_search} +intersection_bench! {intersect_stagger_1_stitch, stagger(1, 1), intersection_stitch} +intersection_bench! {intersect_stagger_diff1_actual, stagger(100, 1 << 1)} +intersection_bench! {intersect_stagger_diff1_search, stagger(100, 1 << 1), intersection_search} +intersection_bench! {intersect_stagger_diff1_stitch, stagger(100, 1 << 1), intersection_stitch} +intersection_bench! {intersect_stagger_diff2_actual, stagger(100, 1 << 2)} +intersection_bench! {intersect_stagger_diff2_search, stagger(100, 1 << 2), intersection_search} +intersection_bench! {intersect_stagger_diff2_stitch, stagger(100, 1 << 2), intersection_stitch} +intersection_bench! {intersect_stagger_diff3_actual, stagger(100, 1 << 3)} +intersection_bench! {intersect_stagger_diff3_search, stagger(100, 1 << 3), intersection_search} +intersection_bench! {intersect_stagger_diff3_stitch, stagger(100, 1 << 3), intersection_stitch} +intersection_bench! {intersect_stagger_diff4_actual, stagger(100, 1 << 4)} +intersection_bench! {intersect_stagger_diff4_search, stagger(100, 1 << 4), intersection_search} +intersection_bench! {intersect_stagger_diff4_stitch, stagger(100, 1 << 4), intersection_stitch} +intersection_bench! {intersect_stagger_diff5_actual, stagger(100, 1 << 5)} +intersection_bench! {intersect_stagger_diff5_search, stagger(100, 1 << 5), intersection_search} +intersection_bench! {intersect_stagger_diff5_stitch, stagger(100, 1 << 5), intersection_stitch} +intersection_bench! {intersect_stagger_diff6_actual, stagger(100, 1 << 6)} +intersection_bench! {intersect_stagger_diff6_search, stagger(100, 1 << 6), intersection_search} +intersection_bench! {intersect_stagger_diff6_stitch, stagger(100, 1 << 6), intersection_stitch} From 8b57fa82203dedd97d8083891f62188d80951629 Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Thu, 28 Feb 2019 12:52:02 +0100 Subject: [PATCH 12/13] BTreeSet intersection: simplify range calculation; swap order again --- src/liballoc/collections/btree/set.rs | 40 ++++++++++----------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/src/liballoc/collections/btree/set.rs b/src/liballoc/collections/btree/set.rs index 0cabb8a0f3782..3f1f185129904 100644 --- a/src/liballoc/collections/btree/set.rs +++ b/src/liballoc/collections/btree/set.rs @@ -341,57 +341,47 @@ impl BTreeSet { }; if a_set.len() <= 1 { // At least one set is empty or a singleton, so determining - // a common range doesn't work or is wasteful. + // a common range is either impossible or wasteful. Intersection { a: a_set.range(..), b: IntersectionOther::Search(b_set), max_size: a_set.len(), } - } else if a_set.len() < b_set.len() / 16 { - // Large set is much larger, so it's faster to iterate the small set - // and find matches in the large set. - Self::intersection_search(a_set, b_set) - } else { - // Both sets are roughly of similar size, so it's efficient - // to iterate both over the common range. + } else if a_set.len() >= b_set.len() / 16 { + // Both sets are roughly of similar size, iterate both. Self::intersection_stitch(a_set, b_set) + } else { + // Iterate small set only and find matches in large set. + Self::intersection_search(a_set, b_set) } } #[doc(hidden)] - #[unstable(feature = "benches_btree_set", reason = "benchmarking for pull #58577", issue = "0")] - pub fn intersection_search<'a>( + #[unstable(feature = "benches_btree_set", reason = "benchmarks for pull #58577", issue = "0")] + pub fn intersection_stitch<'a>( a_set: &'a BTreeSet, b_set: &'a BTreeSet, ) -> Intersection<'a, T> { let a_min = a_set.iter().next().unwrap(); let b_min = b_set.iter().next().unwrap(); - let a_range = match Ord::cmp(a_min, b_min) { - Less => a_set.range(b_min..), - Equal => a_set.range(..), - Greater => a_set.range(..), - }; + let a_range = a_set.range(b_min..); + let b_range = b_set.range(a_min..); Intersection { a: a_range, - b: IntersectionOther::Search(b_set), + b: IntersectionOther::Stitch(b_range), max_size: a_set.len(), } } #[doc(hidden)] - #[unstable(feature = "benches_btree_set", reason = "benchmarking for pull #58577", issue = "0")] - pub fn intersection_stitch<'a>( + #[unstable(feature = "benches_btree_set", reason = "benchmarks for pull #58577", issue = "0")] + pub fn intersection_search<'a>( a_set: &'a BTreeSet, b_set: &'a BTreeSet, ) -> Intersection<'a, T> { - let a_min = a_set.iter().next().unwrap(); let b_min = b_set.iter().next().unwrap(); - let (a_range, b_range) = match Ord::cmp(a_min, b_min) { - Less => (a_set.range(b_min..), b_set.range(..)), - Equal => (a_set.range(..), b_set.range(..)), - Greater => (a_set.range(..), b_set.range(a_min..)), - }; + let a_range = a_set.range(b_min..); Intersection { a: a_range, - b: IntersectionOther::Stitch(b_range), + b: IntersectionOther::Search(b_set), max_size: a_set.len(), } } From 3e90b15127d4174c1cdfd2f0f39f99215bc99f68 Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Sun, 10 Mar 2019 23:26:54 +0100 Subject: [PATCH 13/13] fix benchmark delivering fake news --- src/liballoc/benches/btree/set.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/liballoc/benches/btree/set.rs b/src/liballoc/benches/btree/set.rs index 5d0aaca8648c8..0875b3a5fedfe 100644 --- a/src/liballoc/benches/btree/set.rs +++ b/src/liballoc/benches/btree/set.rs @@ -96,8 +96,8 @@ intersection_bench! {intersect_random_100_vs_10k_actual,random(100, 10_000)} intersection_bench! {intersect_random_100_vs_10k_search,random(100, 10_000), intersection_search} intersection_bench! {intersect_random_100_vs_10k_stitch,random(100, 10_000), intersection_stitch} intersection_bench! {intersect_random_10k_vs_10k_actual,random(10_000, 10_000)} -intersection_bench! {intersect_random_10k_vs_10k_search,random(10_000, 10_000)} -intersection_bench! {intersect_random_10k_vs_10k_stitch,random(10_000, 10_000)} +intersection_bench! {intersect_random_10k_vs_10k_search,random(10_000, 10_000), intersection_search} +intersection_bench! {intersect_random_10k_vs_10k_stitch,random(10_000, 10_000), intersection_stitch} intersection_bench! {intersect_stagger_100_actual, stagger(100, 1)} intersection_bench! {intersect_stagger_100_search, stagger(100, 1), intersection_search} intersection_bench! {intersect_stagger_100_stitch, stagger(100, 1), intersection_stitch}