88// option. This file may not be copied, modified, or distributed
99// except according to those terms.
1010
11+ use array_vec:: ArrayVec ;
1112use std:: borrow:: { Borrow , BorrowMut , ToOwned } ;
1213use std:: fmt;
1314use std:: iter;
@@ -25,6 +26,8 @@ use rustc_serialize;
2526///
2627/// In other words, `T` is the type used to index into the bitvector
2728/// this type uses to represent the set of object it holds.
29+ ///
30+ /// The representation is dense, using one bit per possible element.
2831#[ derive( Eq , PartialEq ) ]
2932pub struct IdxSetBuf < T : Idx > {
3033 _pd : PhantomData < fn ( & T ) > ,
@@ -90,6 +93,8 @@ impl<T: Idx> ToOwned for IdxSet<T> {
9093 }
9194}
9295
96+ const BITS_PER_WORD : usize = mem:: size_of :: < Word > ( ) * 8 ;
97+
9398impl < T : Idx > fmt:: Debug for IdxSetBuf < T > {
9499 fn fmt ( & self , w : & mut fmt:: Formatter ) -> fmt:: Result {
95100 w. debug_list ( )
@@ -108,8 +113,7 @@ impl<T: Idx> fmt::Debug for IdxSet<T> {
108113
109114impl < T : Idx > IdxSetBuf < T > {
110115 fn new ( init : Word , universe_size : usize ) -> Self {
111- let bits_per_word = mem:: size_of :: < Word > ( ) * 8 ;
112- let num_words = ( universe_size + ( bits_per_word - 1 ) ) / bits_per_word;
116+ let num_words = ( universe_size + ( BITS_PER_WORD - 1 ) ) / BITS_PER_WORD ;
113117 IdxSetBuf {
114118 _pd : Default :: default ( ) ,
115119 bits : vec ! [ init; num_words] ,
@@ -160,6 +164,16 @@ impl<T: Idx> IdxSet<T> {
160164 }
161165 }
162166
167+ /// Duplicates as a hybrid set.
168+ pub fn to_hybrid ( & self ) -> HybridIdxSetBuf < T > {
169+ // This universe_size may be slightly larger than the one specified
170+ // upon creation, due to rounding up to a whole word. That's ok.
171+ let universe_size = self . bits . len ( ) * BITS_PER_WORD ;
172+
173+ // Note: we currently don't bother trying to make a Sparse set.
174+ HybridIdxSetBuf :: Dense ( self . to_owned ( ) , universe_size)
175+ }
176+
163177 /// Removes all elements
164178 pub fn clear ( & mut self ) {
165179 for b in & mut self . bits {
@@ -177,21 +191,19 @@ impl<T: Idx> IdxSet<T> {
177191
178192 /// Clear all elements above `universe_size`.
179193 fn trim_to ( & mut self , universe_size : usize ) {
180- let word_bits = mem:: size_of :: < Word > ( ) * 8 ;
181-
182194 // `trim_block` is the first block where some bits have
183195 // to be cleared.
184- let trim_block = universe_size / word_bits ;
196+ let trim_block = universe_size / BITS_PER_WORD ;
185197
186198 // all the blocks above it have to be completely cleared.
187199 if trim_block < self . bits . len ( ) {
188200 for b in & mut self . bits [ trim_block+1 ..] {
189201 * b = 0 ;
190202 }
191203
192- // at that block, the `universe_size % word_bits ` lsbs
204+ // at that block, the `universe_size % BITS_PER_WORD ` lsbs
193205 // should remain.
194- let remaining_bits = universe_size % word_bits ;
206+ let remaining_bits = universe_size % BITS_PER_WORD ;
195207 let mask = ( 1 <<remaining_bits) -1 ;
196208 self . bits [ trim_block] &= mask;
197209 }
@@ -242,12 +254,46 @@ impl<T: Idx> IdxSet<T> {
242254 bitwise ( self . words_mut ( ) , other. words ( ) , & Union )
243255 }
244256
257+ /// Like `union()`, but takes a `SparseIdxSetBuf` argument.
258+ fn union_sparse ( & mut self , other : & SparseIdxSetBuf < T > ) -> bool {
259+ let mut changed = false ;
260+ for elem in other. iter ( ) {
261+ changed |= self . add ( & elem) ;
262+ }
263+ changed
264+ }
265+
266+ /// Like `union()`, but takes a `HybridIdxSetBuf` argument.
267+ pub fn union_hybrid ( & mut self , other : & HybridIdxSetBuf < T > ) -> bool {
268+ match other {
269+ HybridIdxSetBuf :: Sparse ( sparse, _) => self . union_sparse ( sparse) ,
270+ HybridIdxSetBuf :: Dense ( dense, _) => self . union ( dense) ,
271+ }
272+ }
273+
245274 /// Set `self = self - other` and return true if `self` changed.
246275 /// (i.e., if any bits were removed).
247276 pub fn subtract ( & mut self , other : & IdxSet < T > ) -> bool {
248277 bitwise ( self . words_mut ( ) , other. words ( ) , & Subtract )
249278 }
250279
280+ /// Like `subtract()`, but takes a `SparseIdxSetBuf` argument.
281+ fn subtract_sparse ( & mut self , other : & SparseIdxSetBuf < T > ) -> bool {
282+ let mut changed = false ;
283+ for elem in other. iter ( ) {
284+ changed |= self . remove ( & elem) ;
285+ }
286+ changed
287+ }
288+
289+ /// Like `subtract()`, but takes a `HybridIdxSetBuf` argument.
290+ pub fn subtract_hybrid ( & mut self , other : & HybridIdxSetBuf < T > ) -> bool {
291+ match other {
292+ HybridIdxSetBuf :: Sparse ( sparse, _) => self . subtract_sparse ( sparse) ,
293+ HybridIdxSetBuf :: Dense ( dense, _) => self . subtract ( dense) ,
294+ }
295+ }
296+
251297 /// Set `self = self & other` and return true if `self` changed.
252298 /// (i.e., if any bits were removed).
253299 pub fn intersect ( & mut self , other : & IdxSet < T > ) -> bool {
@@ -273,19 +319,200 @@ impl<'a, T: Idx> Iterator for Iter<'a, T> {
273319 type Item = T ;
274320
275321 fn next ( & mut self ) -> Option < T > {
276- let word_bits = mem:: size_of :: < Word > ( ) * 8 ;
277322 loop {
278323 if let Some ( ( ref mut word, offset) ) = self . cur {
279324 let bit_pos = word. trailing_zeros ( ) as usize ;
280- if bit_pos != word_bits {
325+ if bit_pos != BITS_PER_WORD {
281326 let bit = 1 << bit_pos;
282327 * word ^= bit;
283328 return Some ( T :: new ( bit_pos + offset) )
284329 }
285330 }
286331
287332 let ( i, word) = self . iter . next ( ) ?;
288- self . cur = Some ( ( * word, word_bits * i) ) ;
333+ self . cur = Some ( ( * word, BITS_PER_WORD * i) ) ;
334+ }
335+ }
336+ }
337+
338+ const SPARSE_MAX : usize = 8 ;
339+
340+ /// A sparse index set with a maximum of SPARSE_MAX elements. Used by
341+ /// HybridIdxSetBuf; do not use directly.
342+ ///
343+ /// The elements are stored as an unsorted vector with no duplicates.
344+ #[ derive( Clone , Debug ) ]
345+ pub struct SparseIdxSetBuf < T : Idx > ( ArrayVec < [ T ; SPARSE_MAX ] > ) ;
346+
347+ impl < T : Idx > SparseIdxSetBuf < T > {
348+ fn new ( ) -> Self {
349+ SparseIdxSetBuf ( ArrayVec :: new ( ) )
350+ }
351+
352+ fn len ( & self ) -> usize {
353+ self . 0 . len ( )
354+ }
355+
356+ fn contains ( & self , elem : & T ) -> bool {
357+ self . 0 . contains ( elem)
358+ }
359+
360+ fn add ( & mut self , elem : & T ) -> bool {
361+ // Ensure there are no duplicates.
362+ if self . 0 . contains ( elem) {
363+ false
364+ } else {
365+ self . 0 . push ( * elem) ;
366+ true
367+ }
368+ }
369+
370+ fn remove ( & mut self , elem : & T ) -> bool {
371+ if let Some ( i) = self . 0 . iter ( ) . position ( |e| e == elem) {
372+ // Swap the found element to the end, then pop it.
373+ let len = self . 0 . len ( ) ;
374+ self . 0 . swap ( i, len - 1 ) ;
375+ self . 0 . pop ( ) ;
376+ true
377+ } else {
378+ false
379+ }
380+ }
381+
382+ fn to_dense ( & self , universe_size : usize ) -> IdxSetBuf < T > {
383+ let mut dense = IdxSetBuf :: new_empty ( universe_size) ;
384+ for elem in self . 0 . iter ( ) {
385+ dense. add ( elem) ;
386+ }
387+ dense
388+ }
389+
390+ fn iter ( & self ) -> SparseIter < T > {
391+ SparseIter {
392+ iter : self . 0 . iter ( ) ,
393+ }
394+ }
395+ }
396+
397+ pub struct SparseIter < ' a , T : Idx > {
398+ iter : slice:: Iter < ' a , T > ,
399+ }
400+
401+ impl < ' a , T : Idx > Iterator for SparseIter < ' a , T > {
402+ type Item = T ;
403+
404+ fn next ( & mut self ) -> Option < T > {
405+ self . iter . next ( ) . map ( |e| * e)
406+ }
407+ }
408+
409+ /// Like IdxSetBuf, but with a hybrid representation: sparse when there are few
410+ /// elements in the set, but dense when there are many. It's especially
411+ /// efficient for sets that typically have a small number of elements, but a
412+ /// large `universe_size`, and are cleared frequently.
413+ #[ derive( Clone , Debug ) ]
414+ pub enum HybridIdxSetBuf < T : Idx > {
415+ Sparse ( SparseIdxSetBuf < T > , usize ) ,
416+ Dense ( IdxSetBuf < T > , usize ) ,
417+ }
418+
419+ impl < T : Idx > HybridIdxSetBuf < T > {
420+ pub fn new_empty ( universe_size : usize ) -> Self {
421+ HybridIdxSetBuf :: Sparse ( SparseIdxSetBuf :: new ( ) , universe_size)
422+ }
423+
424+ fn universe_size ( & mut self ) -> usize {
425+ match * self {
426+ HybridIdxSetBuf :: Sparse ( _, size) => size,
427+ HybridIdxSetBuf :: Dense ( _, size) => size,
428+ }
429+ }
430+
431+ pub fn clear ( & mut self ) {
432+ let universe_size = self . universe_size ( ) ;
433+ * self = HybridIdxSetBuf :: new_empty ( universe_size) ;
434+ }
435+
436+ /// Returns true iff set `self` contains `elem`.
437+ pub fn contains ( & self , elem : & T ) -> bool {
438+ match self {
439+ HybridIdxSetBuf :: Sparse ( sparse, _) => sparse. contains ( elem) ,
440+ HybridIdxSetBuf :: Dense ( dense, _) => dense. contains ( elem) ,
441+ }
442+ }
443+
444+ /// Adds `elem` to the set `self`.
445+ pub fn add ( & mut self , elem : & T ) -> bool {
446+ match self {
447+ HybridIdxSetBuf :: Sparse ( sparse, _) if sparse. len ( ) < SPARSE_MAX => {
448+ // The set is sparse and has space for `elem`.
449+ sparse. add ( elem)
450+ }
451+ HybridIdxSetBuf :: Sparse ( sparse, _) if sparse. contains ( elem) => {
452+ // The set is sparse and does not have space for `elem`, but
453+ // that doesn't matter because `elem` is already present.
454+ false
455+ }
456+ HybridIdxSetBuf :: Sparse ( _, _) => {
457+ // The set is sparse and full. Convert to a dense set.
458+ //
459+ // FIXME: This code is awful, but I can't work out how else to
460+ // appease the borrow checker.
461+ let dummy = HybridIdxSetBuf :: Sparse ( SparseIdxSetBuf :: new ( ) , 0 ) ;
462+ match mem:: replace ( self , dummy) {
463+ HybridIdxSetBuf :: Sparse ( sparse, universe_size) => {
464+ let mut dense = sparse. to_dense ( universe_size) ;
465+ let changed = dense. add ( elem) ;
466+ assert ! ( changed) ;
467+ mem:: replace ( self , HybridIdxSetBuf :: Dense ( dense, universe_size) ) ;
468+ changed
469+ }
470+ _ => panic ! ( "impossible" ) ,
471+ }
472+ }
473+
474+ HybridIdxSetBuf :: Dense ( dense, _) => dense. add ( elem) ,
475+ }
476+ }
477+
478+ /// Removes `elem` from the set `self`.
479+ pub fn remove ( & mut self , elem : & T ) -> bool {
480+ // Note: we currently don't bother going from Dense back to Sparse.
481+ match self {
482+ HybridIdxSetBuf :: Sparse ( sparse, _) => sparse. remove ( elem) ,
483+ HybridIdxSetBuf :: Dense ( dense, _) => dense. remove ( elem) ,
484+ }
485+ }
486+
487+ /// Converts to a dense set, consuming itself in the process.
488+ pub fn to_dense ( self ) -> IdxSetBuf < T > {
489+ match self {
490+ HybridIdxSetBuf :: Sparse ( sparse, universe_size) => sparse. to_dense ( universe_size) ,
491+ HybridIdxSetBuf :: Dense ( dense, _) => dense,
492+ }
493+ }
494+
495+ /// Iteration order is unspecified.
496+ pub fn iter ( & self ) -> HybridIter < T > {
497+ match self {
498+ HybridIdxSetBuf :: Sparse ( sparse, _) => HybridIter :: Sparse ( sparse. iter ( ) ) ,
499+ HybridIdxSetBuf :: Dense ( dense, _) => HybridIter :: Dense ( dense. iter ( ) ) ,
500+ }
501+ }
502+ }
503+
504+ pub enum HybridIter < ' a , T : Idx > {
505+ Sparse ( SparseIter < ' a , T > ) ,
506+ Dense ( Iter < ' a , T > ) ,
507+ }
508+
509+ impl < ' a , T : Idx > Iterator for HybridIter < ' a , T > {
510+ type Item = T ;
511+
512+ fn next ( & mut self ) -> Option < T > {
513+ match self {
514+ HybridIter :: Sparse ( sparse) => sparse. next ( ) ,
515+ HybridIter :: Dense ( dense) => dense. next ( ) ,
289516 }
290517 }
291518}
0 commit comments