From f8dd65a0bcdc2404406ad304912784ed3e5ff0d5 Mon Sep 17 00:00:00 2001 From: irenjj Date: Sun, 16 Mar 2025 00:24:05 +0800 Subject: [PATCH 01/15] Simplify display format of `AggregateFunctionExpr` --- datafusion/core/src/physical_planner.rs | 17 ++++-- datafusion/expr/src/expr.rs | 63 +++++++++++++++++++++++ datafusion/expr/src/udaf.rs | 44 +++++++++++++++- datafusion/physical-expr/src/aggregate.rs | 15 ++++++ 4 files changed, 133 insertions(+), 6 deletions(-) diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 135b32a0a8d7..35e876fe1fe2 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -1588,6 +1588,7 @@ type AggregateExprWithOptionalArgs = ( pub fn create_aggregate_expr_with_name_and_maybe_filter( e: &Expr, name: Option, + sql_name: String, logical_input_schema: &DFSchema, physical_input_schema: &Schema, execution_props: &ExecutionProps, @@ -1642,6 +1643,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter( .order_by(ordering_reqs) .schema(Arc::new(physical_input_schema.to_owned())) .alias(name) + .sql_name(sql_name) .with_ignore_nulls(ignore_nulls) .with_distinct(*distinct) .build() @@ -1664,15 +1666,22 @@ pub fn create_aggregate_expr_and_maybe_filter( execution_props: &ExecutionProps, ) -> Result { // unpack (nested) aliased logical expressions, e.g. "sum(col) as total" - let (name, e) = match e { - Expr::Alias(Alias { expr, name, .. }) => (Some(name.clone()), expr.as_ref()), - Expr::AggregateFunction(_) => (Some(e.schema_name().to_string()), e), - _ => (None, e), + let (name, sql_name, e) = match e { + Expr::Alias(Alias { expr, name, .. }) => { + (Some(name.clone()), String::default(), expr.as_ref()) + } + Expr::AggregateFunction(_) => ( + Some(e.schema_name().to_string()), + e.sql_name().to_string(), + e, + ), + _ => (None, String::default(), e), }; create_aggregate_expr_with_name_and_maybe_filter( e, name, + sql_name, logical_input_schema, physical_input_schema, execution_props, diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 6f110895a40a..ab227240b73b 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1147,6 +1147,10 @@ impl Expr { SchemaDisplay(self) } + pub fn sql_name(&self) -> impl Display + '_ { + SqlDisplay(self) + } + /// Returns the qualifier and the schema name of this expression. /// /// Used when the expression forms the output field of a certain plan. @@ -2596,6 +2600,43 @@ impl Display for SchemaDisplay<'_> { } } +struct SqlDisplay<'a>(&'a Expr); +impl Display for SqlDisplay<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self.0 { + Expr::Column(c) => { + write!(f, "{}", c.name) + } + Expr::Literal(_) => { + write!(f, "aa") + } + Expr::ScalarVariable(..) => { + write!(f, "bb") + } + Expr::OuterReferenceColumn(..) => { + write!(f, "cc") + } + Expr::Placeholder(_) => { + write!(f, "dd") + } + Expr::Wildcard { .. } => { + write!(f, "ee") + } + Expr::AggregateFunction(AggregateFunction { func, params }) => { + match func.sql_name(params) { + Ok(name) => { + write!(f, "{name}") + } + Err(e) => { + write!(f, "got error from schema_name {}", e) + } + } + } + _ => write!(f, "{}", self.0.schema_name()), + } + } +} + /// Get schema_name for Vector of expressions /// /// Internal usage. Please call `schema_name_from_exprs` instead @@ -2607,11 +2648,21 @@ pub(crate) fn schema_name_from_exprs_comma_separated_without_space( schema_name_from_exprs_inner(exprs, ",") } +pub(crate) fn sql_name_from_exprs_comma_separated_without_space( + exprs: &[Expr], +) -> Result { + sql_name_from_exprs_inner(exprs, ",") +} + /// Get schema_name for Vector of expressions pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result { schema_name_from_exprs_inner(exprs, ", ") } +pub fn sql_name_from_exprs(exprs: &[Expr]) -> Result { + sql_name_from_exprs_inner(exprs, ", ") +} + fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result { let mut s = String::new(); for (i, e) in exprs.iter().enumerate() { @@ -2624,6 +2675,18 @@ fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result Result { + let mut s = String::new(); + for (i, e) in exprs.iter().enumerate() { + if i > 0 { + write!(&mut s, "{sep}")?; + } + write!(&mut s, "{}", SqlDisplay(e))?; + } + + Ok(s) +} + pub fn schema_name_from_sorts(sorts: &[Sort]) -> Result { let mut s = String::new(); for (i, e) in sorts.iter().enumerate() { diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index f9039cea2edc..3a394fbefa7c 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -31,8 +31,8 @@ use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use crate::expr::{ schema_name_from_exprs, schema_name_from_exprs_comma_separated_without_space, - schema_name_from_sorts, AggregateFunction, AggregateFunctionParams, - WindowFunctionParams, + schema_name_from_sorts, sql_name_from_exprs_comma_separated_without_space, + AggregateFunction, AggregateFunctionParams, WindowFunctionParams, }; use crate::function::{ AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs, @@ -175,6 +175,10 @@ impl AggregateUDF { self.inner.schema_name(params) } + pub fn sql_name(&self, params: &AggregateFunctionParams) -> Result { + self.inner.sql_name(params) + } + pub fn window_function_schema_name( &self, params: &WindowFunctionParams, @@ -452,6 +456,42 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { Ok(schema_name) } + fn sql_name(&self, params: &AggregateFunctionParams) -> Result { + let AggregateFunctionParams { + args, + distinct, + filter, + order_by, + null_treatment, + } = params; + + let mut schema_name = String::new(); + + schema_name.write_fmt(format_args!( + "{}({}{})", + self.name(), + if *distinct { "DISTINCT " } else { "" }, + sql_name_from_exprs_comma_separated_without_space(args)? + ))?; + + if let Some(null_treatment) = null_treatment { + schema_name.write_fmt(format_args!(" {}", null_treatment))?; + } + + if let Some(filter) = filter { + schema_name.write_fmt(format_args!(" FILTER (WHERE {filter})"))?; + }; + + if let Some(order_by) = order_by { + schema_name.write_fmt(format_args!( + " ORDER BY [{}]", + schema_name_from_sorts(order_by)? + ))?; + }; + + Ok(schema_name) + } + /// Returns the name of the column this expression would create /// /// See [`Expr::schema_name`] for details diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs index 34c4e52d517e..4cfa2730adcc 100644 --- a/datafusion/physical-expr/src/aggregate.rs +++ b/datafusion/physical-expr/src/aggregate.rs @@ -65,6 +65,7 @@ pub struct AggregateExprBuilder { /// Physical expressions of the aggregate function args: Vec>, alias: Option, + sql_name: String, /// Arrow Schema for the aggregate function schema: SchemaRef, /// The physical order by expressions @@ -83,6 +84,7 @@ impl AggregateExprBuilder { fun, args, alias: None, + sql_name: String::default(), schema: Arc::new(Schema::empty()), ordering_req: LexOrdering::default(), ignore_nulls: false, @@ -99,6 +101,7 @@ impl AggregateExprBuilder { fun, args, alias, + sql_name, schema, ordering_req, ignore_nulls, @@ -148,6 +151,7 @@ impl AggregateExprBuilder { args, data_type, name, + sql_name, schema: Arc::unwrap_or_clone(schema), ordering_req, ignore_nulls, @@ -164,6 +168,11 @@ impl AggregateExprBuilder { self } + pub fn sql_name(mut self, name: String) -> Self { + self.sql_name = name; + self + } + pub fn schema(mut self, schema: SchemaRef) -> Self { self.schema = schema; self @@ -215,6 +224,7 @@ pub struct AggregateFunctionExpr { /// Output / return type of this aggregate data_type: DataType, name: String, + sql_name: String, schema: Schema, // The physical order by expressions ordering_req: LexOrdering, @@ -245,6 +255,11 @@ impl AggregateFunctionExpr { &self.name } + /// Simplified name for `tree` explain. + pub fn sql_name(&self) -> &str { + &self.sql_name + } + /// Return if the aggregation is distinct pub fn is_distinct(&self) -> bool { self.is_distinct From 3610bf5a37912d49699a1e5d35de4e97af00fb09 Mon Sep 17 00:00:00 2001 From: irenjj Date: Mon, 17 Mar 2025 22:13:30 +0800 Subject: [PATCH 02/15] add more info for SqlDisplay --- datafusion/expr/src/expr.rs | 160 ++++++++++++++++-- .../physical-plan/src/aggregates/mod.rs | 2 +- .../sqllogictest/test_files/explain_tree.slt | 108 ++++++------ 3 files changed, 202 insertions(+), 68 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index ab227240b73b..58f681cdb841 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2604,23 +2604,159 @@ struct SqlDisplay<'a>(&'a Expr); impl Display for SqlDisplay<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self.0 { - Expr::Column(c) => { - write!(f, "{}", c.name) + Expr::Column(col) => { + write!(f, "{}", col.name) } - Expr::Literal(_) => { - write!(f, "aa") + Expr::Literal(scalar) => scalar.fmt(f), + Expr::Placeholder(holder) => { + write!(f, "{}", holder.id) } - Expr::ScalarVariable(..) => { - write!(f, "bb") + Expr::Alias(Alias { name, .. }) => write!(f, "{name}"), + Expr::Between(Between { + expr, + negated, + low, + high, + }) => { + if *negated { + write!( + f, + "{} NOT BETWEEN {} AND {}", + SqlDisplay(expr), + SqlDisplay(low), + SqlDisplay(high), + ) + } else { + write!( + f, + "{} BETWEEN {} AND {}", + SqlDisplay(expr), + SqlDisplay(low), + SqlDisplay(high), + ) + } + } + Expr::BinaryExpr(BinaryExpr { left, op, right }) => { + write!(f, "{} {op} {}", SqlDisplay(left), SqlDisplay(right),) + } + Expr::Case(Case { + expr, + when_then_expr, + else_expr, + }) => { + write!(f, "CASE ")?; + + if let Some(e) = expr { + write!(f, "{} ", SqlDisplay(e))?; + } + + for (when, then) in when_then_expr { + write!(f, "WHEN {} THEN {} ", SqlDisplay(when), SqlDisplay(then),)?; + } + + if let Some(e) = else_expr { + write!(f, "ELSE {} ", SqlDisplay(e))?; + } + + write!(f, "END") + } + Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => { + write!(f, "{}", SqlDisplay(expr)) + } + Expr::InList(InList { + expr, + list, + negated, + }) => { + let inlist_name = sql_name_from_exprs(list)?; + + if *negated { + write!(f, "{} NOT IN {}", SqlDisplay(expr), inlist_name) + } else { + write!(f, "{} IN {}", SqlDisplay(expr), inlist_name) + } } - Expr::OuterReferenceColumn(..) => { - write!(f, "cc") + Expr::GroupingSet(GroupingSet::Cube(exprs)) => { + write!(f, "ROLLUP ({})", sql_name_from_exprs(exprs)?) } - Expr::Placeholder(_) => { - write!(f, "dd") + Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => { + write!(f, "GROUPING SETS (")?; + for exprs in lists_of_exprs.iter() { + write!(f, "({})", sql_name_from_exprs(exprs)?)?; + } + write!(f, ")") } - Expr::Wildcard { .. } => { - write!(f, "ee") + Expr::GroupingSet(GroupingSet::Rollup(exprs)) => { + write!(f, "ROLLUP ({})", sql_name_from_exprs(exprs)?) + } + Expr::IsNull(expr) => write!(f, "{} IS NULL", SqlDisplay(expr)), + Expr::IsNotNull(expr) => { + write!(f, "{} IS NOT NULL", SqlDisplay(expr)) + } + Expr::IsUnknown(expr) => { + write!(f, "{} IS UNKNOWN", SqlDisplay(expr)) + } + Expr::IsNotUnknown(expr) => { + write!(f, "{} IS NOT UNKNOWN", SqlDisplay(expr)) + } + Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SqlDisplay(expr)), + Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SqlDisplay(expr)), + Expr::IsNotTrue(expr) => { + write!(f, "{} IS NOT TRUE", SqlDisplay(expr)) + } + Expr::IsNotFalse(expr) => { + write!(f, "{} IS NOT FALSE", SqlDisplay(expr)) + } + Expr::Like(Like { + negated, + expr, + pattern, + escape_char, + case_insensitive, + }) => { + write!( + f, + "{} {}{} {}", + SqlDisplay(expr), + if *negated { "NOT " } else { "" }, + if *case_insensitive { "ILIKE" } else { "LIKE" }, + SqlDisplay(pattern), + )?; + + if let Some(char) = escape_char { + write!(f, " CHAR '{char}'")?; + } + + Ok(()) + } + Expr::Negative(expr) => write!(f, "(- {})", SqlDisplay(expr)), + Expr::Not(expr) => write!(f, "NOT {}", SqlDisplay(expr)), + Expr::Unnest(Unnest { expr }) => { + write!(f, "UNNEST({})", SqlDisplay(expr)) + } + Expr::SimilarTo(Like { + negated, + expr, + pattern, + escape_char, + .. + }) => { + write!( + f, + "{} {} {}", + SqlDisplay(expr), + if *negated { + "NOT SIMILAR TO" + } else { + "SIMILAR TO" + }, + SqlDisplay(pattern), + )?; + if let Some(char) = escape_char { + write!(f, " CHAR '{char}'")?; + } + + Ok(()) } Expr::AggregateFunction(AggregateFunction { func, params }) => { match func.sql_name(params) { diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 4230eeeed0c9..882c955dc3e2 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -834,7 +834,7 @@ impl DisplayAs for AggregateExec { let a: Vec = self .aggr_expr .iter() - .map(|agg| agg.name().to_string()) + .map(|agg| agg.sql_name().to_string()) .collect(); writeln!(f, "mode={:?}", self.mode)?; if !g.is_empty() { diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index b9f4074a450d..ce44abc68e7a 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -201,56 +201,54 @@ physical_plan 01)┌───────────────────────────┐ 02)│ AggregateExec │ 03)│ -------------------- │ -04)│ aggr: │ -05)│ sum(table1.bigint_col) │ -06)│ │ -07)│ group_by: │ -08)│ string_col@0 as string_col│ -09)│ │ -10)│ mode: │ -11)│ FinalPartitioned │ -12)└─────────────┬─────────────┘ -13)┌─────────────┴─────────────┐ -14)│ CoalesceBatchesExec │ -15)│ -------------------- │ -16)│ target_batch_size: │ -17)│ 8192 │ -18)└─────────────┬─────────────┘ -19)┌─────────────┴─────────────┐ -20)│ RepartitionExec │ -21)│ -------------------- │ -22)│ output_partition_count: │ -23)│ 4 │ -24)│ │ -25)│ partitioning_scheme: │ -26)│ Hash([string_col@0], 4) │ -27)└─────────────┬─────────────┘ -28)┌─────────────┴─────────────┐ -29)│ AggregateExec │ -30)│ -------------------- │ -31)│ aggr: │ -32)│ sum(table1.bigint_col) │ -33)│ │ -34)│ group_by: │ -35)│ string_col@0 as string_col│ -36)│ │ -37)│ mode: Partial │ -38)└─────────────┬─────────────┘ -39)┌─────────────┴─────────────┐ -40)│ RepartitionExec │ -41)│ -------------------- │ -42)│ output_partition_count: │ -43)│ 1 │ -44)│ │ -45)│ partitioning_scheme: │ -46)│ RoundRobinBatch(4) │ -47)└─────────────┬─────────────┘ -48)┌─────────────┴─────────────┐ -49)│ DataSourceExec │ -50)│ -------------------- │ -51)│ files: 1 │ -52)│ format: csv │ -53)└───────────────────────────┘ +04)│ aggr: sum(bigint_col) │ +05)│ │ +06)│ group_by: │ +07)│ string_col@0 as string_col│ +08)│ │ +09)│ mode: │ +10)│ FinalPartitioned │ +11)└─────────────┬─────────────┘ +12)┌─────────────┴─────────────┐ +13)│ CoalesceBatchesExec │ +14)│ -------------------- │ +15)│ target_batch_size: │ +16)│ 8192 │ +17)└─────────────┬─────────────┘ +18)┌─────────────┴─────────────┐ +19)│ RepartitionExec │ +20)│ -------------------- │ +21)│ output_partition_count: │ +22)│ 4 │ +23)│ │ +24)│ partitioning_scheme: │ +25)│ Hash([string_col@0], 4) │ +26)└─────────────┬─────────────┘ +27)┌─────────────┴─────────────┐ +28)│ AggregateExec │ +29)│ -------------------- │ +30)│ aggr: sum(bigint_col) │ +31)│ │ +32)│ group_by: │ +33)│ string_col@0 as string_col│ +34)│ │ +35)│ mode: Partial │ +36)└─────────────┬─────────────┘ +37)┌─────────────┴─────────────┐ +38)│ RepartitionExec │ +39)│ -------------------- │ +40)│ output_partition_count: │ +41)│ 1 │ +42)│ │ +43)│ partitioning_scheme: │ +44)│ RoundRobinBatch(4) │ +45)└─────────────┬─────────────┘ +46)┌─────────────┴─────────────┐ +47)│ DataSourceExec │ +48)│ -------------------- │ +49)│ files: 1 │ +50)│ format: csv │ +51)└───────────────────────────┘ # Limit @@ -1339,7 +1337,7 @@ physical_plan 12)-----------------------------┌─────────────┴─────────────┐ 13)-----------------------------│ AggregateExec │ 14)-----------------------------│ -------------------- │ -15)-----------------------------│ aggr: count(Int64(1)) │ +15)-----------------------------│ aggr: count(1) │ 16)-----------------------------│ mode: Final │ 17)-----------------------------└─────────────┬─────────────┘ 18)-----------------------------┌─────────────┴─────────────┐ @@ -1348,7 +1346,7 @@ physical_plan 21)-----------------------------┌─────────────┴─────────────┐ 22)-----------------------------│ AggregateExec │ 23)-----------------------------│ -------------------- │ -24)-----------------------------│ aggr: count(Int64(1)) │ +24)-----------------------------│ aggr: count(1) │ 25)-----------------------------│ mode: Partial │ 26)-----------------------------└─────────────┬─────────────┘ 27)-----------------------------┌─────────────┴─────────────┐ @@ -1477,7 +1475,7 @@ physical_plan 07)┌─────────────┴─────────────┐ 08)│ AggregateExec │ 09)│ -------------------- │ -10)│ aggr: count(Int64(1)) │ +10)│ aggr: count(1) │ 11)│ │ 12)│ group_by: │ 13)│ name@0 as name │ @@ -1949,7 +1947,7 @@ physical_plan 07)┌─────────────┴─────────────┐ 08)│ AggregateExec │ 09)│ -------------------- │ -10)│ aggr: count(Int64(1)) │ +10)│ aggr: count(1) │ 11)│ mode: Final │ 12)└─────────────┬─────────────┘ 13)┌─────────────┴─────────────┐ @@ -1958,7 +1956,7 @@ physical_plan 16)┌─────────────┴─────────────┐ 17)│ AggregateExec │ 18)│ -------------------- │ -19)│ aggr: count(Int64(1)) │ +19)│ aggr: count(1) │ 20)│ mode: Partial │ 21)└─────────────┬─────────────┘ 22)┌─────────────┴─────────────┐ From 38d34e2699235502197707b50f4a4da69091d3dd Mon Sep 17 00:00:00 2001 From: irenjj Date: Mon, 17 Mar 2025 22:54:59 +0800 Subject: [PATCH 03/15] simplify aggr expr --- .../physical-plan/src/aggregates/mod.rs | 35 ++- .../sqllogictest/test_files/explain_tree.slt | 204 ++++++++---------- 2 files changed, 119 insertions(+), 120 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 882c955dc3e2..b0b7402fb15d 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -48,6 +48,7 @@ use datafusion_physical_expr::{ PhysicalSortRequirement, }; +use datafusion_physical_expr_common::physical_expr::fmt_sql; use itertools::Itertools; pub(crate) mod group_values; @@ -742,17 +743,18 @@ impl DisplayAs for AggregateExec { t: DisplayFormatType, f: &mut std::fmt::Formatter, ) -> std::fmt::Result { - let format_expr_with_alias = - |(e, alias): &(Arc, String)| -> String { - let e = e.to_string(); - if &e != alias { - format!("{e} as {alias}") - } else { - e - } - }; match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { + let format_expr_with_alias = + |(e, alias): &(Arc, String)| -> String { + let e = e.to_string(); + if &e != alias { + format!("{e} as {alias}") + } else { + e + } + }; + write!(f, "AggregateExec: mode={:?}", self.mode)?; let g: Vec = if self.group_by.is_single() { self.group_by @@ -801,6 +803,16 @@ impl DisplayAs for AggregateExec { } } DisplayFormatType::TreeRender => { + let format_expr_with_alias = + |(e, alias): &(Arc, String)| -> String { + let expr_sql = fmt_sql(e.as_ref()).to_string(); + if &expr_sql != alias { + format!("{expr_sql} as {alias}") + } else { + expr_sql + } + }; + let g: Vec = if self.group_by.is_single() { self.group_by .expr @@ -830,7 +842,6 @@ impl DisplayAs for AggregateExec { }) .collect() }; - // TODO: Implement `fmt_sql` for `AggregateFunctionExpr`. let a: Vec = self .aggr_expr .iter() @@ -840,7 +851,9 @@ impl DisplayAs for AggregateExec { if !g.is_empty() { writeln!(f, "group_by={}", g.join(", "))?; } - writeln!(f, "aggr={}", a.join(", "))?; + if !a.is_empty() { + writeln!(f, "aggr={}", a.join(", "))?; + } } } Ok(()) diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index ce44abc68e7a..117cc5b7f0d5 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -202,53 +202,48 @@ physical_plan 02)│ AggregateExec │ 03)│ -------------------- │ 04)│ aggr: sum(bigint_col) │ -05)│ │ -06)│ group_by: │ -07)│ string_col@0 as string_col│ -08)│ │ -09)│ mode: │ -10)│ FinalPartitioned │ -11)└─────────────┬─────────────┘ -12)┌─────────────┴─────────────┐ -13)│ CoalesceBatchesExec │ -14)│ -------------------- │ -15)│ target_batch_size: │ -16)│ 8192 │ -17)└─────────────┬─────────────┘ -18)┌─────────────┴─────────────┐ -19)│ RepartitionExec │ -20)│ -------------------- │ -21)│ output_partition_count: │ -22)│ 4 │ -23)│ │ -24)│ partitioning_scheme: │ -25)│ Hash([string_col@0], 4) │ -26)└─────────────┬─────────────┘ -27)┌─────────────┴─────────────┐ -28)│ AggregateExec │ -29)│ -------------------- │ -30)│ aggr: sum(bigint_col) │ -31)│ │ -32)│ group_by: │ -33)│ string_col@0 as string_col│ -34)│ │ -35)│ mode: Partial │ -36)└─────────────┬─────────────┘ -37)┌─────────────┴─────────────┐ -38)│ RepartitionExec │ -39)│ -------------------- │ -40)│ output_partition_count: │ -41)│ 1 │ -42)│ │ -43)│ partitioning_scheme: │ -44)│ RoundRobinBatch(4) │ -45)└─────────────┬─────────────┘ -46)┌─────────────┴─────────────┐ -47)│ DataSourceExec │ -48)│ -------------------- │ -49)│ files: 1 │ -50)│ format: csv │ -51)└───────────────────────────┘ +05)│ group_by: string_col │ +06)│ │ +07)│ mode: │ +08)│ FinalPartitioned │ +09)└─────────────┬─────────────┘ +10)┌─────────────┴─────────────┐ +11)│ CoalesceBatchesExec │ +12)│ -------------------- │ +13)│ target_batch_size: │ +14)│ 8192 │ +15)└─────────────┬─────────────┘ +16)┌─────────────┴─────────────┐ +17)│ RepartitionExec │ +18)│ -------------------- │ +19)│ output_partition_count: │ +20)│ 4 │ +21)│ │ +22)│ partitioning_scheme: │ +23)│ Hash([string_col@0], 4) │ +24)└─────────────┬─────────────┘ +25)┌─────────────┴─────────────┐ +26)│ AggregateExec │ +27)│ -------------------- │ +28)│ aggr: sum(bigint_col) │ +29)│ group_by: string_col │ +30)│ mode: Partial │ +31)└─────────────┬─────────────┘ +32)┌─────────────┴─────────────┐ +33)│ RepartitionExec │ +34)│ -------------------- │ +35)│ output_partition_count: │ +36)│ 1 │ +37)│ │ +38)│ partitioning_scheme: │ +39)│ RoundRobinBatch(4) │ +40)└─────────────┬─────────────┘ +41)┌─────────────┴─────────────┐ +42)│ DataSourceExec │ +43)│ -------------------- │ +44)│ files: 1 │ +45)│ format: csv │ +46)└───────────────────────────┘ # Limit @@ -1476,68 +1471,59 @@ physical_plan 08)│ AggregateExec │ 09)│ -------------------- │ 10)│ aggr: count(1) │ -11)│ │ -12)│ group_by: │ -13)│ name@0 as name │ -14)│ │ -15)│ mode: │ -16)│ SinglePartitioned │ -17)└─────────────┬─────────────┘ -18)┌─────────────┴─────────────┐ -19)│ InterleaveExec ├──────────────┐ -20)└─────────────┬─────────────┘ │ -21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -22)│ AggregateExec ││ AggregateExec │ -23)│ -------------------- ││ -------------------- │ -24)│ aggr ││ aggr │ -25)│ ││ │ -26)│ group_by: ││ group_by: │ -27)│ name@0 as name ││ name@0 as name │ -28)│ ││ │ -29)│ mode: ││ mode: │ -30)│ FinalPartitioned ││ FinalPartitioned │ -31)└─────────────┬─────────────┘└─────────────┬─────────────┘ -32)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -33)│ CoalesceBatchesExec ││ CoalesceBatchesExec │ -34)│ -------------------- ││ -------------------- │ -35)│ target_batch_size: ││ target_batch_size: │ -36)│ 8192 ││ 8192 │ -37)└─────────────┬─────────────┘└─────────────┬─────────────┘ -38)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -39)│ RepartitionExec ││ RepartitionExec │ -40)│ -------------------- ││ -------------------- │ -41)│ output_partition_count: ││ output_partition_count: │ -42)│ 4 ││ 4 │ -43)│ ││ │ -44)│ partitioning_scheme: ││ partitioning_scheme: │ -45)│ Hash([name@0], 4) ││ Hash([name@0], 4) │ -46)└─────────────┬─────────────┘└─────────────┬─────────────┘ -47)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -48)│ RepartitionExec ││ RepartitionExec │ -49)│ -------------------- ││ -------------------- │ -50)│ output_partition_count: ││ output_partition_count: │ -51)│ 1 ││ 1 │ -52)│ ││ │ -53)│ partitioning_scheme: ││ partitioning_scheme: │ -54)│ RoundRobinBatch(4) ││ RoundRobinBatch(4) │ -55)└─────────────┬─────────────┘└─────────────┬─────────────┘ -56)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -57)│ AggregateExec ││ AggregateExec │ -58)│ -------------------- ││ -------------------- │ -59)│ aggr ││ aggr │ -60)│ ││ │ -61)│ group_by: ││ group_by: │ -62)│ name@0 as name ││ name@0 as name │ -63)│ ││ │ -64)│ mode: Partial ││ mode: Partial │ -65)└─────────────┬─────────────┘└─────────────┬─────────────┘ -66)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ -67)│ DataSourceExec ││ DataSourceExec │ -68)│ -------------------- ││ -------------------- │ -69)│ bytes: 1320 ││ bytes: 1312 │ -70)│ format: memory ││ format: memory │ -71)│ rows: 1 ││ rows: 1 │ -72)└───────────────────────────┘└───────────────────────────┘ +11)│ group_by: name │ +12)│ │ +13)│ mode: │ +14)│ SinglePartitioned │ +15)└─────────────┬─────────────┘ +16)┌─────────────┴─────────────┐ +17)│ InterleaveExec ├──────────────┐ +18)└─────────────┬─────────────┘ │ +19)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +20)│ AggregateExec ││ AggregateExec │ +21)│ -------------------- ││ -------------------- │ +22)│ group_by: name ││ group_by: name │ +23)│ ││ │ +24)│ mode: ││ mode: │ +25)│ FinalPartitioned ││ FinalPartitioned │ +26)└─────────────┬─────────────┘└─────────────┬─────────────┘ +27)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +28)│ CoalesceBatchesExec ││ CoalesceBatchesExec │ +29)│ -------------------- ││ -------------------- │ +30)│ target_batch_size: ││ target_batch_size: │ +31)│ 8192 ││ 8192 │ +32)└─────────────┬─────────────┘└─────────────┬─────────────┘ +33)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +34)│ RepartitionExec ││ RepartitionExec │ +35)│ -------------------- ││ -------------------- │ +36)│ output_partition_count: ││ output_partition_count: │ +37)│ 4 ││ 4 │ +38)│ ││ │ +39)│ partitioning_scheme: ││ partitioning_scheme: │ +40)│ Hash([name@0], 4) ││ Hash([name@0], 4) │ +41)└─────────────┬─────────────┘└─────────────┬─────────────┘ +42)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +43)│ RepartitionExec ││ RepartitionExec │ +44)│ -------------------- ││ -------------------- │ +45)│ output_partition_count: ││ output_partition_count: │ +46)│ 1 ││ 1 │ +47)│ ││ │ +48)│ partitioning_scheme: ││ partitioning_scheme: │ +49)│ RoundRobinBatch(4) ││ RoundRobinBatch(4) │ +50)└─────────────┬─────────────┘└─────────────┬─────────────┘ +51)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +52)│ AggregateExec ││ AggregateExec │ +53)│ -------------------- ││ -------------------- │ +54)│ group_by: name ││ group_by: name │ +55)│ mode: Partial ││ mode: Partial │ +56)└─────────────┬─────────────┘└─────────────┬─────────────┘ +57)┌─────────────┴─────────────┐┌─────────────┴─────────────┐ +58)│ DataSourceExec ││ DataSourceExec │ +59)│ -------------------- ││ -------------------- │ +60)│ bytes: 1320 ││ bytes: 1312 │ +61)│ format: memory ││ format: memory │ +62)│ rows: 1 ││ rows: 1 │ +63)└───────────────────────────┘└───────────────────────────┘ # cleanup statement ok From 3e8f1d0286de976c2357c5326d70913f99f5d78c Mon Sep 17 00:00:00 2001 From: irenjj Date: Mon, 17 Mar 2025 23:10:22 +0800 Subject: [PATCH 04/15] add doc --- datafusion/expr/src/expr.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 58f681cdb841..9261cd338724 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1147,6 +1147,18 @@ impl Expr { SchemaDisplay(self) } + /// The human readable name of the column (field) that this `Expr` will produce. + /// This name is primarily used in printing the explain tree output, (e.g. `EXPLAIN `), + /// providing a readable format to show how expressions are used in physical and logical plans. + /// + /// # Example + /// ``` + /// # use datafusion_expr::{col, lit}; + /// let expr = col("foo") + lit(42); + /// // For EXPLAIN output: + /// // "foo + 42" + /// println!("{}", expr.sql_name()); + /// ``` pub fn sql_name(&self) -> impl Display + '_ { SqlDisplay(self) } @@ -2784,6 +2796,7 @@ pub(crate) fn schema_name_from_exprs_comma_separated_without_space( schema_name_from_exprs_inner(exprs, ",") } +/// Get `sql_name` for Vector of expressions. pub(crate) fn sql_name_from_exprs_comma_separated_without_space( exprs: &[Expr], ) -> Result { @@ -2795,6 +2808,7 @@ pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result { schema_name_from_exprs_inner(exprs, ", ") } +/// Get `sql_name` for Vector of expressions. pub fn sql_name_from_exprs(exprs: &[Expr]) -> Result { sql_name_from_exprs_inner(exprs, ", ") } From 5929ef47fea8dc76b8aa9dc12288faf8aff69c87 Mon Sep 17 00:00:00 2001 From: irenjj Date: Mon, 17 Mar 2025 23:15:52 +0800 Subject: [PATCH 05/15] add table name --- datafusion/expr/src/expr.rs | 3 - .../sqllogictest/test_files/explain_tree.slt | 88 ++++++++++--------- 2 files changed, 46 insertions(+), 45 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 9261cd338724..1ffa699fee86 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2616,9 +2616,6 @@ struct SqlDisplay<'a>(&'a Expr); impl Display for SqlDisplay<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self.0 { - Expr::Column(col) => { - write!(f, "{}", col.name) - } Expr::Literal(scalar) => scalar.fmt(f), Expr::Placeholder(holder) => { write!(f, "{}", holder.id) diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index 117cc5b7f0d5..7f246222e141 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -201,49 +201,53 @@ physical_plan 01)┌───────────────────────────┐ 02)│ AggregateExec │ 03)│ -------------------- │ -04)│ aggr: sum(bigint_col) │ -05)│ group_by: string_col │ +04)│ aggr: │ +05)│ sum(table1.bigint_col) │ 06)│ │ -07)│ mode: │ -08)│ FinalPartitioned │ -09)└─────────────┬─────────────┘ -10)┌─────────────┴─────────────┐ -11)│ CoalesceBatchesExec │ -12)│ -------------------- │ -13)│ target_batch_size: │ -14)│ 8192 │ -15)└─────────────┬─────────────┘ -16)┌─────────────┴─────────────┐ -17)│ RepartitionExec │ -18)│ -------------------- │ -19)│ output_partition_count: │ -20)│ 4 │ -21)│ │ -22)│ partitioning_scheme: │ -23)│ Hash([string_col@0], 4) │ -24)└─────────────┬─────────────┘ -25)┌─────────────┴─────────────┐ -26)│ AggregateExec │ -27)│ -------------------- │ -28)│ aggr: sum(bigint_col) │ -29)│ group_by: string_col │ -30)│ mode: Partial │ -31)└─────────────┬─────────────┘ -32)┌─────────────┴─────────────┐ -33)│ RepartitionExec │ -34)│ -------------------- │ -35)│ output_partition_count: │ -36)│ 1 │ -37)│ │ -38)│ partitioning_scheme: │ -39)│ RoundRobinBatch(4) │ -40)└─────────────┬─────────────┘ -41)┌─────────────┴─────────────┐ -42)│ DataSourceExec │ -43)│ -------------------- │ -44)│ files: 1 │ -45)│ format: csv │ -46)└───────────────────────────┘ +07)│ group_by: string_col │ +08)│ │ +09)│ mode: │ +10)│ FinalPartitioned │ +11)└─────────────┬─────────────┘ +12)┌─────────────┴─────────────┐ +13)│ CoalesceBatchesExec │ +14)│ -------------------- │ +15)│ target_batch_size: │ +16)│ 8192 │ +17)└─────────────┬─────────────┘ +18)┌─────────────┴─────────────┐ +19)│ RepartitionExec │ +20)│ -------------------- │ +21)│ output_partition_count: │ +22)│ 4 │ +23)│ │ +24)│ partitioning_scheme: │ +25)│ Hash([string_col@0], 4) │ +26)└─────────────┬─────────────┘ +27)┌─────────────┴─────────────┐ +28)│ AggregateExec │ +29)│ -------------------- │ +30)│ aggr: │ +31)│ sum(table1.bigint_col) │ +32)│ │ +33)│ group_by: string_col │ +34)│ mode: Partial │ +35)└─────────────┬─────────────┘ +36)┌─────────────┴─────────────┐ +37)│ RepartitionExec │ +38)│ -------------------- │ +39)│ output_partition_count: │ +40)│ 1 │ +41)│ │ +42)│ partitioning_scheme: │ +43)│ RoundRobinBatch(4) │ +44)└─────────────┬─────────────┘ +45)┌─────────────┴─────────────┐ +46)│ DataSourceExec │ +47)│ -------------------- │ +48)│ files: 1 │ +49)│ format: csv │ +50)└───────────────────────────┘ # Limit From f13a0caa0906e7271dca5fcdd9d76acecce036c9 Mon Sep 17 00:00:00 2001 From: irenjj Date: Tue, 18 Mar 2025 21:30:06 +0800 Subject: [PATCH 06/15] fix issues --- datafusion/expr/src/expr.rs | 100 +++++++++++++++++++++++------------- datafusion/expr/src/udaf.rs | 6 +-- 2 files changed, 67 insertions(+), 39 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 1ffa699fee86..716f971b7078 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2612,14 +2612,22 @@ impl Display for SchemaDisplay<'_> { } } -struct SqlDisplay<'a>(&'a Expr); +/// A helper struct for displaying an `Expr` as an SQL-like string. +/// +/// This struct provides a simple way to convert an `Expr` to a string representation that resembles SQL. +/// It is intended for explain display purpose rather than generating production-ready SQL. +/// If you need syntactically correct SQL for use in other systems, it is recommended to use `Unparser` from the `datafusion-sql` crate. +/// +/// # Note +/// +/// For generating syntactically correct SQL that can be fed to other systems, consider using `Unparser`. +/// For more details, see the [Unparser documentation](https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html).struct SqlDisplay<'a>(&'a Expr); +pub struct SqlDisplay<'a>(&'a Expr); + impl Display for SqlDisplay<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self.0 { Expr::Literal(scalar) => scalar.fmt(f), - Expr::Placeholder(holder) => { - write!(f, "{}", holder.id) - } Expr::Alias(Alias { name, .. }) => write!(f, "{name}"), Expr::Between(Between { expr, @@ -2677,26 +2685,38 @@ impl Display for SqlDisplay<'_> { list, negated, }) => { - let inlist_name = sql_name_from_exprs(list)?; - - if *negated { - write!(f, "{} NOT IN {}", SqlDisplay(expr), inlist_name) - } else { - write!(f, "{} IN {}", SqlDisplay(expr), inlist_name) - } + write!( + f, + "{}{} IN {}", + SqlDisplay(expr), + if *negated { " NOT" } else { "" }, + ExprListDisplay::comma_separated(list.as_slice()) + ) } Expr::GroupingSet(GroupingSet::Cube(exprs)) => { - write!(f, "ROLLUP ({})", sql_name_from_exprs(exprs)?) + write!( + f, + "ROLLUP ({})", + ExprListDisplay::comma_separated(exprs.as_slice()) + ) } Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => { write!(f, "GROUPING SETS (")?; for exprs in lists_of_exprs.iter() { - write!(f, "({})", sql_name_from_exprs(exprs)?)?; + write!( + f, + "({})", + ExprListDisplay::comma_separated(exprs.as_slice()) + )?; } write!(f, ")") } Expr::GroupingSet(GroupingSet::Rollup(exprs)) => { - write!(f, "ROLLUP ({})", sql_name_from_exprs(exprs)?) + write!( + f, + "ROLLUP ({})", + ExprListDisplay::comma_separated(exprs.as_slice()) + ) } Expr::IsNull(expr) => write!(f, "{} IS NULL", SqlDisplay(expr)), Expr::IsNotNull(expr) => { @@ -2777,7 +2797,7 @@ impl Display for SqlDisplay<'_> { } } } - _ => write!(f, "{}", self.0.schema_name()), + _ => write!(f, "{}", self.0), } } } @@ -2793,42 +2813,50 @@ pub(crate) fn schema_name_from_exprs_comma_separated_without_space( schema_name_from_exprs_inner(exprs, ",") } -/// Get `sql_name` for Vector of expressions. -pub(crate) fn sql_name_from_exprs_comma_separated_without_space( - exprs: &[Expr], -) -> Result { - sql_name_from_exprs_inner(exprs, ",") +/// Formats a list of `&Expr` with a custom separator using SQL display format +pub struct ExprListDisplay<'a> { + exprs: &'a [Expr], + sep: &'a str, } -/// Get schema_name for Vector of expressions -pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result { - schema_name_from_exprs_inner(exprs, ", ") -} +impl<'a> ExprListDisplay<'a> { + /// Create a new display struct with the given expressions and separator + pub fn new(exprs: &'a [Expr], sep: &'a str) -> Self { + Self { exprs, sep } + } -/// Get `sql_name` for Vector of expressions. -pub fn sql_name_from_exprs(exprs: &[Expr]) -> Result { - sql_name_from_exprs_inner(exprs, ", ") + /// Create a new display struct with comma-space separator + pub fn comma_separated(exprs: &'a [Expr]) -> Self { + Self::new(exprs, ", ") + } } -fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result { - let mut s = String::new(); - for (i, e) in exprs.iter().enumerate() { - if i > 0 { - write!(&mut s, "{sep}")?; +impl Display for ExprListDisplay<'_> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let mut first = true; + for expr in self.exprs { + if !first { + write!(f, "{}", self.sep)?; + } + write!(f, "{}", SqlDisplay(expr))?; + first = false; } - write!(&mut s, "{}", SchemaDisplay(e))?; + Ok(()) } +} - Ok(s) +/// Get schema_name for Vector of expressions +pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result { + schema_name_from_exprs_inner(exprs, ", ") } -fn sql_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result { +fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result { let mut s = String::new(); for (i, e) in exprs.iter().enumerate() { if i > 0 { write!(&mut s, "{sep}")?; } - write!(&mut s, "{}", SqlDisplay(e))?; + write!(&mut s, "{}", SchemaDisplay(e))?; } Ok(s) diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index 3a394fbefa7c..f611d09894e0 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -31,8 +31,8 @@ use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use crate::expr::{ schema_name_from_exprs, schema_name_from_exprs_comma_separated_without_space, - schema_name_from_sorts, sql_name_from_exprs_comma_separated_without_space, - AggregateFunction, AggregateFunctionParams, WindowFunctionParams, + schema_name_from_sorts, AggregateFunction, AggregateFunctionParams, ExprListDisplay, + WindowFunctionParams, }; use crate::function::{ AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs, @@ -471,7 +471,7 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { "{}({}{})", self.name(), if *distinct { "DISTINCT " } else { "" }, - sql_name_from_exprs_comma_separated_without_space(args)? + ExprListDisplay::comma_separated(args.as_slice()) ))?; if let Some(null_treatment) = null_treatment { From 27202610b123f2328e2557a65ad7642bcfaa62ac Mon Sep 17 00:00:00 2001 From: irenjj Date: Tue, 18 Mar 2025 21:32:35 +0800 Subject: [PATCH 07/15] Update datafusion/physical-expr/src/aggregate.rs Co-authored-by: Andrew Lamb --- datafusion/physical-expr/src/aggregate.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs index 4cfa2730adcc..20594df50be4 100644 --- a/datafusion/physical-expr/src/aggregate.rs +++ b/datafusion/physical-expr/src/aggregate.rs @@ -224,7 +224,8 @@ pub struct AggregateFunctionExpr { /// Output / return type of this aggregate data_type: DataType, name: String, - sql_name: String, + /// Simplified name for `tree` explain. + sql_name: String, schema: Schema, // The physical order by expressions ordering_req: LexOrdering, From 316ab9b4213f7898d2f59ee1ecfbe422f02d03c3 Mon Sep 17 00:00:00 2001 From: irenjj Date: Tue, 18 Mar 2025 21:32:49 +0800 Subject: [PATCH 08/15] Update datafusion/physical-expr/src/aggregate.rs Co-authored-by: Andrew Lamb --- datafusion/physical-expr/src/aggregate.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs index 20594df50be4..b7977e463afe 100644 --- a/datafusion/physical-expr/src/aggregate.rs +++ b/datafusion/physical-expr/src/aggregate.rs @@ -223,6 +223,7 @@ pub struct AggregateFunctionExpr { args: Vec>, /// Output / return type of this aggregate data_type: DataType, + /// Output column name that this expression creates name: String, /// Simplified name for `tree` explain. sql_name: String, From 202a1d25d188742eccf8be779de265001aade7c0 Mon Sep 17 00:00:00 2001 From: irenjj Date: Tue, 18 Mar 2025 21:33:04 +0800 Subject: [PATCH 09/15] Update datafusion/physical-plan/src/aggregates/mod.rs Co-authored-by: Andrew Lamb --- datafusion/physical-plan/src/aggregates/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index b0b7402fb15d..4eba9ba71df0 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -805,7 +805,7 @@ impl DisplayAs for AggregateExec { DisplayFormatType::TreeRender => { let format_expr_with_alias = |(e, alias): &(Arc, String)| -> String { - let expr_sql = fmt_sql(e.as_ref()).to_string(); + let expr_sql = fmt_sql(e.as_ref()); if &expr_sql != alias { format!("{expr_sql} as {alias}") } else { From f0e0798da490cb9ca54112bf2c2e0d62a27dc0e6 Mon Sep 17 00:00:00 2001 From: irenjj Date: Tue, 18 Mar 2025 21:36:27 +0800 Subject: [PATCH 10/15] Update datafusion/physical-plan/src/aggregates/mod.rs Co-authored-by: Andrew Lamb From b7e985d1875a1998663785d8e2d02c1d4bd94a1c Mon Sep 17 00:00:00 2001 From: irenjj Date: Tue, 18 Mar 2025 21:42:36 +0800 Subject: [PATCH 11/15] fmt --- datafusion/physical-expr/src/aggregate.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs index b7977e463afe..b43ad53cc561 100644 --- a/datafusion/physical-expr/src/aggregate.rs +++ b/datafusion/physical-expr/src/aggregate.rs @@ -226,7 +226,7 @@ pub struct AggregateFunctionExpr { /// Output column name that this expression creates name: String, /// Simplified name for `tree` explain. - sql_name: String, + sql_name: String, schema: Schema, // The physical order by expressions ordering_req: LexOrdering, From acc0dc50ce5d589695fe9de19f1abfb7d9fdbcdd Mon Sep 17 00:00:00 2001 From: irenjj Date: Tue, 18 Mar 2025 22:05:43 +0800 Subject: [PATCH 12/15] fix build --- datafusion/physical-plan/src/aggregates/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 4eba9ba71df0..b0b7402fb15d 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -805,7 +805,7 @@ impl DisplayAs for AggregateExec { DisplayFormatType::TreeRender => { let format_expr_with_alias = |(e, alias): &(Arc, String)| -> String { - let expr_sql = fmt_sql(e.as_ref()); + let expr_sql = fmt_sql(e.as_ref()).to_string(); if &expr_sql != alias { format!("{expr_sql} as {alias}") } else { From af138200c2a631df20b9ebce85374b6c4827af67 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 18 Mar 2025 14:47:50 -0400 Subject: [PATCH 13/15] improve docs --- datafusion/expr/src/expr.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 716f971b7078..91465752822d 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -104,6 +104,7 @@ use sqlparser::ast::{ /// // All literals are strongly typed in DataFusion. To make an `i64` 42: /// let expr = lit(42i64); /// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)))); +/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)))); /// // To make a (typed) NULL: /// let expr = Expr::Literal(ScalarValue::Int64(None)); /// // to make an (untyped) NULL (the optimizer will coerce this to the correct type): @@ -1147,9 +1148,17 @@ impl Expr { SchemaDisplay(self) } - /// The human readable name of the column (field) that this `Expr` will produce. - /// This name is primarily used in printing the explain tree output, (e.g. `EXPLAIN `), - /// providing a readable format to show how expressions are used in physical and logical plans. + /// Human readable formatting for this expression. + /// + /// This name is primarily used in printing the explain tree output, (e.g. + /// `EXPLAIN `), providing a readable format to show how expressions + /// are used in physical and logical plans. + /// + /// Note this format is intended for human consumption rather than SQL for + /// other systems. If you need SQL to pass to other systems, consider using + /// [`Unparser`]. + /// + /// [Unparser]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html /// /// # Example /// ``` @@ -2613,16 +2622,7 @@ impl Display for SchemaDisplay<'_> { } /// A helper struct for displaying an `Expr` as an SQL-like string. -/// -/// This struct provides a simple way to convert an `Expr` to a string representation that resembles SQL. -/// It is intended for explain display purpose rather than generating production-ready SQL. -/// If you need syntactically correct SQL for use in other systems, it is recommended to use `Unparser` from the `datafusion-sql` crate. -/// -/// # Note -/// -/// For generating syntactically correct SQL that can be fed to other systems, consider using `Unparser`. -/// For more details, see the [Unparser documentation](https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html).struct SqlDisplay<'a>(&'a Expr); -pub struct SqlDisplay<'a>(&'a Expr); +struct SqlDisplay<'a>(&'a Expr); impl Display for SqlDisplay<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { From e1bdb5fd0639d3e56f93501ed94c6e5f908fb156 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 18 Mar 2025 15:07:37 -0400 Subject: [PATCH 14/15] Add documentation about sql_format --- datafusion/expr/src/expr.rs | 55 +++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 91465752822d..3a5410fcfb8f 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -64,6 +64,11 @@ use sqlparser::ast::{ /// /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt /// +/// # Printing Expressions +/// +/// You can print `Expr`s using the the `Debug` trait, `Display` trait, or +/// [`Self::sql_name`]. See the [examples](#examples-displaying-exprs) below. +/// /// # Schema Access /// /// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability @@ -76,9 +81,9 @@ use sqlparser::ast::{ /// `Expr` and [`TreeNode::transform`] can be used to rewrite an expression. See /// the examples below and [`TreeNode`] for more information. /// -/// # Examples +/// # Examples: Creating and Using `Expr`s /// -/// ## Column references and literals +/// ## Column References and Literals /// /// [`Expr::Column`] refer to the values of columns and are often created with /// the [`col`] function. For example to create an expression `c1` referring to @@ -172,7 +177,51 @@ use sqlparser::ast::{ /// ]); /// ``` /// -/// # Visiting and Rewriting `Expr`s +/// # Examples: Displaying `Exprs` +/// +/// There are three ways to print an `Expr` depending on the usecase. +/// +/// ## Use `Debug` trait +/// +/// Following Rust conventions, the `Debug` implementation prints out the +/// internal structure of the expression, which is useful for debugging. +/// +/// ``` +/// # use datafusion_expr::{lit, col}; +/// let expr = col("c1") + lit(42); +/// assert_eq!(format!("{expr:?}"), "BinaryExpr(BinaryExpr { left: Column(Column { relation: None, name: \"c1\" }), op: Plus, right: Literal(Int32(42)) })"); +/// ``` +/// +/// ## Use the `Display` trait (detailed expression) +/// +/// The `Display` implementation prints out the expression in a SQL-like form, +/// but has additional details such as the data type of literals. This is useful +/// for understanding the expression in more detail and is used for the low level +/// [`ExplainFormat::Indent`] explain plan format. +/// +/// [`ExplainFormat::Indent`]: crate::logical_plan::ExplainFormat::Indent +/// +/// ``` +/// # use datafusion_expr::{lit, col}; +/// let expr = col("c1") + lit(42); +/// assert_eq!(format!("{expr}"), "c1 + Int32(42)"); +/// ``` +/// +/// ## Use [`Self::sql_name`] (human readable) +/// +/// [`Self::sql_name`] prints out the expression in a SQL-like form, optimized +/// for human consumption by end users. It is used for the +/// [`ExplainFormat::Tree`] explain plan format. +/// +/// [`ExplainFormat::Tree`]: crate::logical_plan::ExplainFormat::Tree +/// +/// ``` +/// # use datafusion_expr::{lit, col}; +/// let expr = col("c1") + lit(42); +/// assert_eq!(format!("{}", expr.sql_name()), "c1 + 42"); +/// ``` +/// +/// # Examples: Visiting and Rewriting `Expr`s /// /// Here is an example that finds all literals in an `Expr` tree: /// ``` From 76eb7f9e77d9943422ee1d35459beef2652d9564 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 18 Mar 2025 15:22:36 -0400 Subject: [PATCH 15/15] Rename to `human_display` --- datafusion/core/src/physical_planner.rs | 10 +++--- datafusion/expr/src/expr.rs | 31 +++++++++++-------- datafusion/expr/src/udaf.rs | 12 +++++-- datafusion/physical-expr/src/aggregate.rs | 19 ++++++------ .../physical-plan/src/aggregates/mod.rs | 2 +- 5 files changed, 43 insertions(+), 31 deletions(-) diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 35e876fe1fe2..bc3c1d0ac99b 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -1588,7 +1588,7 @@ type AggregateExprWithOptionalArgs = ( pub fn create_aggregate_expr_with_name_and_maybe_filter( e: &Expr, name: Option, - sql_name: String, + human_displan: String, logical_input_schema: &DFSchema, physical_input_schema: &Schema, execution_props: &ExecutionProps, @@ -1643,7 +1643,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter( .order_by(ordering_reqs) .schema(Arc::new(physical_input_schema.to_owned())) .alias(name) - .sql_name(sql_name) + .human_display(human_displan) .with_ignore_nulls(ignore_nulls) .with_distinct(*distinct) .build() @@ -1666,13 +1666,13 @@ pub fn create_aggregate_expr_and_maybe_filter( execution_props: &ExecutionProps, ) -> Result { // unpack (nested) aliased logical expressions, e.g. "sum(col) as total" - let (name, sql_name, e) = match e { + let (name, human_display, e) = match e { Expr::Alias(Alias { expr, name, .. }) => { (Some(name.clone()), String::default(), expr.as_ref()) } Expr::AggregateFunction(_) => ( Some(e.schema_name().to_string()), - e.sql_name().to_string(), + e.human_display().to_string(), e, ), _ => (None, String::default(), e), @@ -1681,7 +1681,7 @@ pub fn create_aggregate_expr_and_maybe_filter( create_aggregate_expr_with_name_and_maybe_filter( e, name, - sql_name, + human_display, logical_input_schema, physical_input_schema, execution_props, diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 3a5410fcfb8f..9f6855b69824 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -67,7 +67,11 @@ use sqlparser::ast::{ /// # Printing Expressions /// /// You can print `Expr`s using the the `Debug` trait, `Display` trait, or -/// [`Self::sql_name`]. See the [examples](#examples-displaying-exprs) below. +/// [`Self::human_display`]. See the [examples](#examples-displaying-exprs) below. +/// +/// If you need SQL to pass to other systems, consider using [`Unparser`]. +/// +/// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html /// /// # Schema Access /// @@ -207,18 +211,18 @@ use sqlparser::ast::{ /// assert_eq!(format!("{expr}"), "c1 + Int32(42)"); /// ``` /// -/// ## Use [`Self::sql_name`] (human readable) +/// ## Use [`Self::human_display`] (human readable) /// -/// [`Self::sql_name`] prints out the expression in a SQL-like form, optimized +/// [`Self::human_display`] prints out the expression in a SQL-like form, optimized /// for human consumption by end users. It is used for the /// [`ExplainFormat::Tree`] explain plan format. /// /// [`ExplainFormat::Tree`]: crate::logical_plan::ExplainFormat::Tree /// -/// ``` +///``` /// # use datafusion_expr::{lit, col}; /// let expr = col("c1") + lit(42); -/// assert_eq!(format!("{}", expr.sql_name()), "c1 + 42"); +/// assert_eq!(format!("{}", expr.human_display()), "c1 + 42"); /// ``` /// /// # Examples: Visiting and Rewriting `Expr`s @@ -1197,17 +1201,18 @@ impl Expr { SchemaDisplay(self) } - /// Human readable formatting for this expression. + /// Human readable display formatting for this expression. /// - /// This name is primarily used in printing the explain tree output, (e.g. - /// `EXPLAIN `), providing a readable format to show how expressions - /// are used in physical and logical plans. + /// This function is primarily used in printing the explain tree output, + /// (e.g. `EXPLAIN FORMAT TREE `), providing a readable format to + /// show how expressions are used in physical and logical plans. See the + /// [`Expr`] for other ways to format expressions /// /// Note this format is intended for human consumption rather than SQL for /// other systems. If you need SQL to pass to other systems, consider using /// [`Unparser`]. /// - /// [Unparser]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html + /// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html /// /// # Example /// ``` @@ -1215,9 +1220,9 @@ impl Expr { /// let expr = col("foo") + lit(42); /// // For EXPLAIN output: /// // "foo + 42" - /// println!("{}", expr.sql_name()); + /// println!("{}", expr.human_display()); /// ``` - pub fn sql_name(&self) -> impl Display + '_ { + pub fn human_display(&self) -> impl Display + '_ { SqlDisplay(self) } @@ -2837,7 +2842,7 @@ impl Display for SqlDisplay<'_> { Ok(()) } Expr::AggregateFunction(AggregateFunction { func, params }) => { - match func.sql_name(params) { + match func.human_display(params) { Ok(name) => { write!(f, "{name}") } diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index f611d09894e0..b75e8fd3cd3c 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -175,8 +175,11 @@ impl AggregateUDF { self.inner.schema_name(params) } - pub fn sql_name(&self, params: &AggregateFunctionParams) -> Result { - self.inner.sql_name(params) + /// Returns a human readable expression. + /// + /// See [`Expr::human_display`] for details. + pub fn human_display(&self, params: &AggregateFunctionParams) -> Result { + self.inner.human_display(params) } pub fn window_function_schema_name( @@ -456,7 +459,10 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { Ok(schema_name) } - fn sql_name(&self, params: &AggregateFunctionParams) -> Result { + /// Returns a human readable expression. + /// + /// See [`Expr::human_display`] for details. + fn human_display(&self, params: &AggregateFunctionParams) -> Result { let AggregateFunctionParams { args, distinct, diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs index b43ad53cc561..ae3d9050fa62 100644 --- a/datafusion/physical-expr/src/aggregate.rs +++ b/datafusion/physical-expr/src/aggregate.rs @@ -65,7 +65,8 @@ pub struct AggregateExprBuilder { /// Physical expressions of the aggregate function args: Vec>, alias: Option, - sql_name: String, + /// A human readable name + human_display: String, /// Arrow Schema for the aggregate function schema: SchemaRef, /// The physical order by expressions @@ -84,7 +85,7 @@ impl AggregateExprBuilder { fun, args, alias: None, - sql_name: String::default(), + human_display: String::default(), schema: Arc::new(Schema::empty()), ordering_req: LexOrdering::default(), ignore_nulls: false, @@ -101,7 +102,7 @@ impl AggregateExprBuilder { fun, args, alias, - sql_name, + human_display, schema, ordering_req, ignore_nulls, @@ -151,7 +152,7 @@ impl AggregateExprBuilder { args, data_type, name, - sql_name, + human_display, schema: Arc::unwrap_or_clone(schema), ordering_req, ignore_nulls, @@ -168,8 +169,8 @@ impl AggregateExprBuilder { self } - pub fn sql_name(mut self, name: String) -> Self { - self.sql_name = name; + pub fn human_display(mut self, name: String) -> Self { + self.human_display = name; self } @@ -226,7 +227,7 @@ pub struct AggregateFunctionExpr { /// Output column name that this expression creates name: String, /// Simplified name for `tree` explain. - sql_name: String, + human_display: String, schema: Schema, // The physical order by expressions ordering_req: LexOrdering, @@ -258,8 +259,8 @@ impl AggregateFunctionExpr { } /// Simplified name for `tree` explain. - pub fn sql_name(&self) -> &str { - &self.sql_name + pub fn human_display(&self) -> &str { + &self.human_display } /// Return if the aggregation is distinct diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index b0b7402fb15d..84ba0fe3b630 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -845,7 +845,7 @@ impl DisplayAs for AggregateExec { let a: Vec = self .aggr_expr .iter() - .map(|agg| agg.sql_name().to_string()) + .map(|agg| agg.human_display().to_string()) .collect(); writeln!(f, "mode={:?}", self.mode)?; if !g.is_empty() {