-
-
Notifications
You must be signed in to change notification settings - Fork 672
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
use group_by in translate_codec_idx_to_original_id
- Loading branch information
Showing
4 changed files
with
324 additions
and
92 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
use std::cell::RefCell; | ||
use std::iter::Peekable; | ||
use std::rc::Rc; | ||
|
||
pub trait GroupByIteratorExtended: Iterator { | ||
/// Return an `Iterator` that groups iterator elements. Consecutive elements that map to the | ||
/// same key are assigned to the same group. | ||
/// | ||
/// The returned Iterator item is `(K, impl Iterator)`, where Iterator are the items of the | ||
/// group. | ||
/// | ||
/// ``` | ||
/// use tantivy_common::GroupByIteratorExtended; | ||
/// | ||
/// // group data into blocks of larger than zero or not. | ||
/// let data: Vec<i32> = vec![1, 3, -2, -2, 1, 0, 1, 2]; | ||
/// // groups: |---->|------>|--------->| | ||
/// | ||
/// let mut data_grouped = Vec::new(); | ||
/// // Note: group is an iterator | ||
/// for (key, group) in data.into_iter().group_by(|val| *val >= 0) { | ||
/// data_grouped.push((key, group.collect())); | ||
/// } | ||
/// assert_eq!(data_grouped, vec![(true, vec![1, 3]), (false, vec![-2, -2]), (true, vec![1, 0, 1, 2])]); | ||
/// ``` | ||
fn group_by<K, F>(self, key: F) -> GroupByIterator<Self, F, K> | ||
where | ||
Self: Sized, | ||
F: FnMut(&Self::Item) -> K, | ||
K: PartialEq + Copy, | ||
Self::Item: Copy, | ||
{ | ||
GroupByIterator::new(self, key) | ||
} | ||
} | ||
impl<I: Iterator> GroupByIteratorExtended for I {} | ||
|
||
pub struct GroupByIterator<I, F, K: Copy> | ||
where | ||
I: Iterator, | ||
F: FnMut(&I::Item) -> K, | ||
{ | ||
// I really would like to avoid the Rc<RefCell>, but the Iterator is shared between | ||
// `GroupByIterator` and `GroupIter`. In practice they are used consecutive and | ||
// `GroupByIter` is finished before calling next on `GroupByIterator`. I'm not sure there | ||
// is a solution with lifetimes for that, because we would need to enforce it in the usage | ||
// somehow. | ||
// | ||
// One potential solution would be to replace the iterator approach with something similar. | ||
inner: Rc<RefCell<GroupByShared<I, F, K>>>, | ||
} | ||
|
||
struct GroupByShared<I, F, K: Copy> | ||
where | ||
I: Iterator, | ||
F: FnMut(&I::Item) -> K, | ||
{ | ||
iter: Peekable<I>, | ||
group_by_fn: F, | ||
} | ||
|
||
impl<I, F, K> GroupByIterator<I, F, K> | ||
where | ||
I: Iterator, | ||
F: FnMut(&I::Item) -> K, | ||
K: Copy, | ||
{ | ||
fn new(inner: I, group_by_fn: F) -> Self { | ||
let inner = GroupByShared { | ||
iter: inner.peekable(), | ||
group_by_fn, | ||
}; | ||
|
||
Self { | ||
inner: Rc::new(RefCell::new(inner)), | ||
} | ||
} | ||
} | ||
|
||
impl<I, F, K> Iterator for GroupByIterator<I, F, K> | ||
where | ||
I: Iterator, | ||
I::Item: Copy, | ||
F: FnMut(&I::Item) -> K, | ||
K: Copy, | ||
{ | ||
type Item = (K, GroupIterator<I, F, K>); | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
let mut inner = self.inner.borrow_mut(); | ||
let value = *inner.iter.peek()?; | ||
let key = (inner.group_by_fn)(&value); | ||
|
||
let inner = self.inner.clone(); | ||
|
||
let group_iter = GroupIterator { | ||
inner, | ||
group_key: key, | ||
}; | ||
Some((key, group_iter)) | ||
} | ||
} | ||
|
||
pub struct GroupIterator<I, F, K: Copy> | ||
where | ||
I: Iterator, | ||
F: FnMut(&I::Item) -> K, | ||
{ | ||
inner: Rc<RefCell<GroupByShared<I, F, K>>>, | ||
group_key: K, | ||
} | ||
|
||
impl<I, F, K: PartialEq + Copy> Iterator for GroupIterator<I, F, K> | ||
where | ||
I: Iterator, | ||
I::Item: Copy, | ||
F: FnMut(&I::Item) -> K, | ||
{ | ||
type Item = I::Item; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
let mut inner = self.inner.borrow_mut(); | ||
// peek if next value is in group | ||
let peek_val = *inner.iter.peek()?; | ||
if (inner.group_by_fn)(&peek_val) == self.group_key { | ||
inner.iter.next() | ||
} else { | ||
None | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
fn group_by_collect<I: Iterator<Item = u32>>(iter: I) -> Vec<(I::Item, Vec<I::Item>)> { | ||
iter.group_by(|val| val / 10) | ||
.map(|(el, iter)| (el, iter.collect::<Vec<_>>())) | ||
.collect::<Vec<_>>() | ||
} | ||
|
||
#[test] | ||
fn group_by_two_groups() { | ||
let vals = vec![1u32, 4, 15]; | ||
let grouped_vals = group_by_collect(vals.into_iter()); | ||
assert_eq!(grouped_vals, vec![(0, vec![1, 4]), (1, vec![15])]); | ||
} | ||
|
||
#[test] | ||
fn group_by_test_empty() { | ||
let vals = vec![]; | ||
let grouped_vals = group_by_collect(vals.into_iter()); | ||
assert_eq!(grouped_vals, vec![]); | ||
} | ||
|
||
#[test] | ||
fn group_by_three_groups() { | ||
let vals = vec![1u32, 4, 15, 1]; | ||
let grouped_vals = group_by_collect(vals.into_iter()); | ||
assert_eq!( | ||
grouped_vals, | ||
vec![(0, vec![1, 4]), (1, vec![15]), (0, vec![1])] | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.