Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge Merge/MergeBy/MergeJoinBy implementations (v2) #736

Merged
merged 5 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1075,7 +1075,6 @@ pub trait Itertools: Iterator {
where
J: IntoIterator,
F: FnMut(&Self::Item, &J::Item) -> T,
T: merge_join::OrderingOrBool<Self::Item, J::Item>,
Self: Sized,
{
merge_join_by(self, other, cmp_fn)
Expand Down
232 changes: 102 additions & 130 deletions src/merge_join.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::cmp::Ordering;
use std::fmt;
use std::iter::Fuse;
use std::iter::{FusedIterator, Peekable};
use std::iter::{Fuse, FusedIterator};
use std::marker::PhantomData;

use either::Either;

Expand All @@ -11,19 +11,9 @@ use crate::size_hint::{self, SizeHint};
#[cfg(doc)]
use crate::Itertools;

pub trait MergePredicate<T> {
fn merge_pred(&mut self, a: &T, b: &T) -> bool;
}

#[derive(Clone, Debug)]
pub struct MergeLte;

impl<T: PartialOrd> MergePredicate<T> for MergeLte {
fn merge_pred(&mut self, a: &T, b: &T) -> bool {
a <= b
}
}

/// An iterator adaptor that merges the two base iterators in ascending order.
/// If both base iterators are sorted (ascending), the result is sorted.
///
Expand Down Expand Up @@ -62,103 +52,25 @@ where
///
/// See [`.merge_by()`](crate::Itertools::merge_by) for more information.
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
pub struct MergeBy<I, J, F>
where
I: Iterator,
J: Iterator<Item = I::Item>,
{
a: Peekable<I>,
b: Peekable<J>,
fused: Option<bool>,
cmp: F,
}

impl<I, J, F> fmt::Debug for MergeBy<I, J, F>
where
I: Iterator + fmt::Debug,
J: Iterator<Item = I::Item> + fmt::Debug,
I::Item: fmt::Debug,
{
debug_fmt_fields!(MergeBy, a, b);
}

impl<T, F: FnMut(&T, &T) -> bool> MergePredicate<T> for F {
fn merge_pred(&mut self, a: &T, b: &T) -> bool {
self(a, b)
}
pub struct MergeBy<I: Iterator, J: Iterator, F> {
left: PutBack<Fuse<I>>,
right: PutBack<Fuse<J>>,
cmp_fn: F,
}

/// Create a `MergeBy` iterator.
pub fn merge_by_new<I, J, F>(a: I, b: J, cmp: F) -> MergeBy<I::IntoIter, J::IntoIter, F>
where
I: IntoIterator,
J: IntoIterator<Item = I::Item>,
F: MergePredicate<I::Item>,
{
MergeBy {
a: a.into_iter().peekable(),
b: b.into_iter().peekable(),
fused: None,
cmp,
left: put_back(a.into_iter().fuse()),
right: put_back(b.into_iter().fuse()),
cmp_fn: cmp,
}
}

impl<I, J, F> Clone for MergeBy<I, J, F>
where
I: Iterator,
J: Iterator<Item = I::Item>,
Peekable<I>: Clone,
Peekable<J>: Clone,
F: Clone,
{
clone_fields!(a, b, fused, cmp);
}

impl<I, J, F> Iterator for MergeBy<I, J, F>
where
I: Iterator,
J: Iterator<Item = I::Item>,
F: MergePredicate<I::Item>,
{
type Item = I::Item;

fn next(&mut self) -> Option<Self::Item> {
let less_than = match self.fused {
Some(lt) => lt,
None => match (self.a.peek(), self.b.peek()) {
(Some(a), Some(b)) => self.cmp.merge_pred(a, b),
(Some(_), None) => {
self.fused = Some(true);
true
}
(None, Some(_)) => {
self.fused = Some(false);
false
}
(None, None) => return None,
},
};
if less_than {
self.a.next()
} else {
self.b.next()
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
// Not ExactSizeIterator because size may be larger than usize
size_hint::add(self.a.size_hint(), self.b.size_hint())
}
}

impl<I, J, F> FusedIterator for MergeBy<I, J, F>
where
I: FusedIterator,
J: FusedIterator<Item = I::Item>,
F: MergePredicate<I::Item>,
{
}

/// Return an iterator adaptor that merge-joins items from the two base iterators in ascending order.
///
/// [`IntoIterator`] enabled version of [`Itertools::merge_join_by`].
Expand All @@ -171,46 +83,54 @@ where
I: IntoIterator,
J: IntoIterator,
F: FnMut(&I::Item, &J::Item) -> T,
T: OrderingOrBool<I::Item, J::Item>,
{
MergeJoinBy {
MergeBy {
left: put_back(left.into_iter().fuse()),
right: put_back(right.into_iter().fuse()),
cmp_fn,
cmp_fn: MergeFuncLR(cmp_fn, PhantomData),
}
}

/// An iterator adaptor that merge-joins items from the two base iterators in ascending order.
///
/// See [`.merge_join_by()`](crate::Itertools::merge_join_by) for more information.
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
pub struct MergeJoinBy<I: Iterator, J: Iterator, F> {
left: PutBack<Fuse<I>>,
right: PutBack<Fuse<J>>,
cmp_fn: F,
pub type MergeJoinBy<I, J, F> =
MergeBy<I, J, MergeFuncLR<F, <F as FuncLR<<I as Iterator>::Item, <J as Iterator>::Item>>::T>>;

#[derive(Clone, Debug)]
pub struct MergeFuncLR<F, T>(F, PhantomData<T>);

pub trait FuncLR<L, R> {
type T;
}

impl<L, R, T, F: FnMut(&L, &R) -> T> FuncLR<L, R> for F {
type T = T;
}

pub trait OrderingOrBool<L, R> {
type Out;
jswrenn marked this conversation as resolved.
Show resolved Hide resolved
type MergeResult;
fn left(left: L) -> Self::MergeResult;
fn right(right: R) -> Self::MergeResult;
// "merge" never returns (Some(...), Some(...), ...) so Option<Either<I::Item, J::Item>>
// is appealing but it is always followed by two put_backs, so we think the compiler is
// smart enough to optimize it. Or we could move put_backs into "merge".
fn merge(self, left: L, right: R) -> (Option<L>, Option<R>, Self::MergeResult);
fn merge(&mut self, left: L, right: R) -> (Option<L>, Option<R>, Self::MergeResult);
fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint;
}

impl<L, R> OrderingOrBool<L, R> for Ordering {
impl<L, R, F: FnMut(&L, &R) -> Ordering> OrderingOrBool<L, R> for MergeFuncLR<F, Ordering> {
type Out = Ordering;
type MergeResult = EitherOrBoth<L, R>;
fn left(left: L) -> Self::MergeResult {
EitherOrBoth::Left(left)
}
fn right(right: R) -> Self::MergeResult {
EitherOrBoth::Right(right)
}
fn merge(self, left: L, right: R) -> (Option<L>, Option<R>, Self::MergeResult) {
match self {
fn merge(&mut self, left: L, right: R) -> (Option<L>, Option<R>, Self::MergeResult) {
match self.0(&left, &right) {
Ordering::Equal => (None, None, EitherOrBoth::Both(left, right)),
Ordering::Less => (None, Some(right), EitherOrBoth::Left(left)),
Ordering::Greater => (Some(left), None, EitherOrBoth::Right(right)),
Expand All @@ -228,16 +148,17 @@ impl<L, R> OrderingOrBool<L, R> for Ordering {
}
}

impl<L, R> OrderingOrBool<L, R> for bool {
impl<L, R, F: FnMut(&L, &R) -> bool> OrderingOrBool<L, R> for MergeFuncLR<F, bool> {
type Out = bool;
type MergeResult = Either<L, R>;
fn left(left: L) -> Self::MergeResult {
Either::Left(left)
}
fn right(right: R) -> Self::MergeResult {
Either::Right(right)
}
fn merge(self, left: L, right: R) -> (Option<L>, Option<R>, Self::MergeResult) {
if self {
fn merge(&mut self, left: L, right: R) -> (Option<L>, Option<R>, Self::MergeResult) {
if self.0(&left, &right) {
(None, Some(right), Either::Left(left))
} else {
(Some(left), None, Either::Right(right))
Expand All @@ -249,7 +170,51 @@ impl<L, R> OrderingOrBool<L, R> for bool {
}
}

impl<I, J, F> Clone for MergeJoinBy<I, J, F>
impl<T, F: FnMut(&T, &T) -> bool> OrderingOrBool<T, T> for F {
type Out = bool;
type MergeResult = T;
fn left(left: T) -> Self::MergeResult {
left
}
fn right(right: T) -> Self::MergeResult {
right
}
fn merge(&mut self, left: T, right: T) -> (Option<T>, Option<T>, Self::MergeResult) {
if self(&left, &right) {
(None, Some(right), left)
} else {
(Some(left), None, right)
}
}
fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint {
// Not ExactSizeIterator because size may be larger than usize
size_hint::add(left, right)
}
}

impl<T: PartialOrd> OrderingOrBool<T, T> for MergeLte {
type Out = bool;
type MergeResult = T;
fn left(left: T) -> Self::MergeResult {
left
}
fn right(right: T) -> Self::MergeResult {
right
}
fn merge(&mut self, left: T, right: T) -> (Option<T>, Option<T>, Self::MergeResult) {
if left <= right {
(None, Some(right), left)
} else {
(Some(left), None, right)
}
}
fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint {
// Not ExactSizeIterator because size may be larger than usize
size_hint::add(left, right)
}
}

impl<I, J, F> Clone for MergeBy<I, J, F>
where
I: Iterator,
J: Iterator,
Expand All @@ -260,32 +225,31 @@ where
clone_fields!(left, right, cmp_fn);
}

impl<I, J, F> fmt::Debug for MergeJoinBy<I, J, F>
impl<I, J, F> fmt::Debug for MergeBy<I, J, F>
where
I: Iterator + fmt::Debug,
I::Item: fmt::Debug,
J: Iterator + fmt::Debug,
J::Item: fmt::Debug,
{
debug_fmt_fields!(MergeJoinBy, left, right);
debug_fmt_fields!(MergeBy, left, right);
}

impl<I, J, F, T> Iterator for MergeJoinBy<I, J, F>
impl<I, J, F, T> Iterator for MergeBy<I, J, F>
where
I: Iterator,
J: Iterator,
F: FnMut(&I::Item, &J::Item) -> T,
T: OrderingOrBool<I::Item, J::Item>,
F: OrderingOrBool<I::Item, J::Item, Out = T>,
{
type Item = T::MergeResult;
type Item = F::MergeResult;

fn next(&mut self) -> Option<Self::Item> {
match (self.left.next(), self.right.next()) {
(None, None) => None,
(Some(left), None) => Some(T::left(left)),
(None, Some(right)) => Some(T::right(right)),
(Some(left), None) => Some(F::left(left)),
(None, Some(right)) => Some(F::right(right)),
(Some(left), Some(right)) => {
let (left, right, next) = (self.cmp_fn)(&left, &right).merge(left, right);
let (left, right, next) = self.cmp_fn.merge(left, right);
if let Some(left) = left {
self.left.put_back(left);
}
Expand All @@ -298,7 +262,7 @@ where
}

fn size_hint(&self) -> SizeHint {
T::size_hint(self.left.size_hint(), self.right.size_hint())
F::size_hint(self.left.size_hint(), self.right.size_hint())
}

fn count(mut self) -> usize {
Expand All @@ -310,7 +274,7 @@ where
(None, Some(_right)) => break count + 1 + self.right.into_parts().1.count(),
(Some(left), Some(right)) => {
count += 1;
let (left, right, _) = (self.cmp_fn)(&left, &right).merge(left, right);
let (left, right, _) = self.cmp_fn.merge(left, right);
if let Some(left) = left {
self.left.put_back(left);
}
Expand All @@ -328,13 +292,13 @@ where
match (self.left.next(), self.right.next()) {
(None, None) => break previous_element,
(Some(left), None) => {
break Some(T::left(self.left.into_parts().1.last().unwrap_or(left)))
break Some(F::left(self.left.into_parts().1.last().unwrap_or(left)))
}
(None, Some(right)) => {
break Some(T::right(self.right.into_parts().1.last().unwrap_or(right)))
break Some(F::right(self.right.into_parts().1.last().unwrap_or(right)))
}
(Some(left), Some(right)) => {
let (left, right, elem) = (self.cmp_fn)(&left, &right).merge(left, right);
let (left, right, elem) = self.cmp_fn.merge(left, right);
if let Some(left) = left {
self.left.put_back(left);
}
Expand All @@ -355,10 +319,10 @@ where
n -= 1;
match (self.left.next(), self.right.next()) {
(None, None) => break None,
(Some(_left), None) => break self.left.nth(n).map(T::left),
(None, Some(_right)) => break self.right.nth(n).map(T::right),
(Some(_left), None) => break self.left.nth(n).map(F::left),
(None, Some(_right)) => break self.right.nth(n).map(F::right),
(Some(left), Some(right)) => {
let (left, right, _) = (self.cmp_fn)(&left, &right).merge(left, right);
let (left, right, _) = self.cmp_fn.merge(left, right);
if let Some(left) = left {
self.left.put_back(left);
}
Expand All @@ -370,3 +334,11 @@ where
}
}
}

impl<I, J, F, T> FusedIterator for MergeBy<I, J, F>
where
I: Iterator,
J: Iterator,
F: OrderingOrBool<I::Item, J::Item, Out = T>,
{
}