From 6071a18966fe0646638198849e692295d2e26f03 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 13 Feb 2026 14:56:00 -0500 Subject: [PATCH 1/2] Minor: Update comments on OptimizerRule about function name matching --- datafusion/optimizer/src/optimizer.rs | 38 ++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index 118ddef49b7e7..46f130907b1f0 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -58,12 +58,12 @@ use crate::simplify_expressions::SimplifyExpressions; use crate::single_distinct_to_groupby::SingleDistinctToGroupBy; use crate::utils::log_plan; -/// `OptimizerRule`s transforms one [`LogicalPlan`] into another which -/// computes the same results, but in a potentially more efficient -/// way. If there are no suitable transformations for the input plan, -/// the optimizer should simply return it unmodified. +/// Transforms one [`LogicalPlan`] into another which computes the same results, +/// but in a potentially more efficient way. /// -/// To change the semantics of a `LogicalPlan`, see [`AnalyzerRule`] +/// See notes on [`Self::rewrite`] for details on how to implement an `OptimizerRule`. +/// +/// To change the semantics of a `LogicalPlan`, see [`AnalyzerRule`]. /// /// Use [`SessionState::add_optimizer_rule`] to register additional /// `OptimizerRule`s. @@ -88,8 +88,32 @@ pub trait OptimizerRule: Debug { true } - /// Try to rewrite `plan` to an optimized form, returning `Transformed::yes` - /// if the plan was rewritten and `Transformed::no` if it was not. + /// Try to rewrite `plan` to an optimized form, returning [`Transformed::yes`] + /// if the plan was rewritten and [`Transformed::no`] if it was not. + /// + /// # Notes for implementations: + /// + /// # Return the same plan if no changes were made + /// + /// If there are no suitable transformations for the input plan, + /// the optimizer should simply return it unmodified. + /// + /// The optimizer will call `rewrite` several times until a fixed point is + /// reached, so it is important that `rewrite` return [`Transformed::no`] if + /// the output is the same. + /// + /// ## Matching on functions + /// + /// The rule should avoid function specific transformations, and instead use + /// methods on [`ScalarUDFImpl`], [`AggregateUDFImpl`]. Specifically the + /// rule should not check function names as functions can be overridden, and + /// may not have the same semantics as the functions provided with + /// DataFusion. + /// + /// For example if a rule rewrites a function based on the check + /// `func.name() == 'sum'`, if rewrite the plan incorrectly if the + /// registered `sum` function has different behavior / semantics (for + /// example the sum from the datafusion-spark crate) fn rewrite( &self, _plan: LogicalPlan, From 6759858d4a53c315552f01c2a2fbba8903704b56 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 13 Feb 2026 15:24:21 -0500 Subject: [PATCH 2/2] fix grammar, links --- datafusion/optimizer/src/optimizer.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index 46f130907b1f0..2ccba360cf9b3 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -93,7 +93,7 @@ pub trait OptimizerRule: Debug { /// /// # Notes for implementations: /// - /// # Return the same plan if no changes were made + /// ## Return the same plan if no changes were made /// /// If there are no suitable transformations for the input plan, /// the optimizer should simply return it unmodified. @@ -104,16 +104,19 @@ pub trait OptimizerRule: Debug { /// /// ## Matching on functions /// - /// The rule should avoid function specific transformations, and instead use - /// methods on [`ScalarUDFImpl`], [`AggregateUDFImpl`]. Specifically the + /// The rule should avoid function-specific transformations, and instead use + /// methods on [`ScalarUDFImpl`] and [`AggregateUDFImpl`]. Specifically, the /// rule should not check function names as functions can be overridden, and /// may not have the same semantics as the functions provided with /// DataFusion. /// - /// For example if a rule rewrites a function based on the check - /// `func.name() == 'sum'`, if rewrite the plan incorrectly if the - /// registered `sum` function has different behavior / semantics (for - /// example the sum from the datafusion-spark crate) + /// For example, if a rule rewrites a function based on the check + /// `func.name() == "sum"`, it may rewrite the plan incorrectly if the + /// registered `sum` function has different semantics (for example, the + /// `sum` function from the `datafusion-spark` crate). + /// + /// [`ScalarUDFImpl`]: datafusion_expr::ScalarUDFImpl + /// [`AggregateUDFImpl`]: datafusion_expr::ScalarUDFImpl fn rewrite( &self, _plan: LogicalPlan,