From 6617151cf361c99a4b1640a89804f831c340d0bd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 28 Feb 2025 15:11:57 -0500 Subject: [PATCH 1/4] Minor: improve documentation of AggregateMode --- .../physical-plan/src/aggregates/mod.rs | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 0947a2ff5539..1000a8ced3b7 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -57,41 +57,53 @@ mod row_hash; mod topk; mod topk_stream; -/// Hash aggregate modes +/// Aggregation modes /// /// See [`Accumulator::state`] for background information on multi-phase /// aggregation and how these modes are used. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum AggregateMode { + /// One of multiple layers of aggregation, any input partitioning + /// /// Partial aggregate that can be applied in parallel across input /// partitions. /// /// This is the first phase of a multi-phase aggregation. Partial, + /// *Final* of multiple layers of aggregation, in exactly one partition + /// /// Final aggregate that produces a single partition of output by combining /// the output of multiple partial aggregates. /// /// This is the second phase of a multi-phase aggregation. + /// + /// This mode requires that the input is a single partition Final, + /// *Final* of multiple layers of aggregation, input is *Partitioned* + /// /// Final aggregate that works on pre-partitioned data. /// - /// This requires the invariant that all rows with a particular - /// grouping key are in the same partitions, such as is the case - /// with Hash repartitioning on the group keys. If a group key is - /// duplicated, duplicate groups would be produced + /// This mode requires that all rows with a particular grouping key are in + /// the same partitions, such as is the case with Hash repartitioning on the + /// group keys. If a group key is duplicated, duplicate groups would be + /// produced FinalPartitioned, + /// *Single* layer of Aggregation, input in exactly one partition + /// /// Applies the entire logical aggregation operation in a single operator, /// as opposed to Partial / Final modes which apply the logical aggregation using /// two operators. /// /// This mode requires that the input is a single partition (like Final) Single, + /// *Single* layer of Aggregation, input is *Partitioned* + /// /// Applies the entire logical aggregation operation in a single operator, - /// as opposed to Partial / Final modes which apply the logical aggregation using - /// two operators. + /// as opposed to Partial / Final modes which apply the logical aggregation + /// using two operators. /// - /// This mode requires that the input is partitioned by group key (like - /// FinalPartitioned) + /// This mode requires that the input has more than one partition, and is + /// partitioned by group key (like FinalPartitioned). SinglePartitioned, } From 8f95ffbf4c0118fc091ee700d5d0c45f0c74cb75 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 28 Feb 2025 17:04:12 -0500 Subject: [PATCH 2/4] Update datafusion/physical-plan/src/aggregates/mod.rs Co-authored-by: wiedld --- datafusion/physical-plan/src/aggregates/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 1000a8ced3b7..ad7e7c9669b2 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -88,7 +88,7 @@ pub enum AggregateMode { /// group keys. If a group key is duplicated, duplicate groups would be /// produced FinalPartitioned, - /// *Single* layer of Aggregation, input in exactly one partition + /// *Single* layer of Aggregation, input is exactly one partition /// /// Applies the entire logical aggregation operation in a single operator, /// as opposed to Partial / Final modes which apply the logical aggregation using From 68ca5d59c6df4062d0addc9d779512b0f12b4428 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 1 Mar 2025 08:09:54 -0500 Subject: [PATCH 3/4] Update datafusion/physical-plan/src/aggregates/mod.rs Co-authored-by: Yongting You <2010youy01@gmail.com> --- datafusion/physical-plan/src/aggregates/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index ad7e7c9669b2..5a9a28e64372 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -78,6 +78,11 @@ pub enum AggregateMode { /// This is the second phase of a multi-phase aggregation. /// /// This mode requires that the input is a single partition + /// + /// Note: Adjacent `Partial` and `Final` mode aggregation is equivalent to a `Single` + /// mode aggregation node. The `Final` mode is required since this is used in an + /// intermediate step. The [`CombinePartialFinalAggregate`] physical optimizer rule + /// will replace this combination with `Single` mode for more efficient execution. Final, /// *Final* of multiple layers of aggregation, input is *Partitioned* /// From 2530cc19b09daf893b4e0b3c892aa3264a1d403c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 3 Mar 2025 08:53:01 -0500 Subject: [PATCH 4/4] chore: fix link --- datafusion/physical-plan/src/aggregates/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 5a9a28e64372..bac9a4287a73 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -83,6 +83,8 @@ pub enum AggregateMode { /// mode aggregation node. The `Final` mode is required since this is used in an /// intermediate step. The [`CombinePartialFinalAggregate`] physical optimizer rule /// will replace this combination with `Single` mode for more efficient execution. + /// + /// [`CombinePartialFinalAggregate`]: https://docs.rs/datafusion/latest/datafusion/physical_optimizer/combine_partial_final_agg/struct.CombinePartialFinalAggregate.html Final, /// *Final* of multiple layers of aggregation, input is *Partitioned* ///