Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions datafusion/core/src/physical_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1588,6 +1588,7 @@ type AggregateExprWithOptionalArgs = (
pub fn create_aggregate_expr_with_name_and_maybe_filter(
e: &Expr,
name: Option<String>,
human_displan: String,
logical_input_schema: &DFSchema,
physical_input_schema: &Schema,
execution_props: &ExecutionProps,
Expand Down Expand Up @@ -1642,6 +1643,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
.order_by(ordering_reqs)
.schema(Arc::new(physical_input_schema.to_owned()))
.alias(name)
.human_display(human_displan)
.with_ignore_nulls(ignore_nulls)
.with_distinct(*distinct)
.build()
Expand All @@ -1664,15 +1666,22 @@ pub fn create_aggregate_expr_and_maybe_filter(
execution_props: &ExecutionProps,
) -> Result<AggregateExprWithOptionalArgs> {
// unpack (nested) aliased logical expressions, e.g. "sum(col) as total"
let (name, e) = match e {
Expr::Alias(Alias { expr, name, .. }) => (Some(name.clone()), expr.as_ref()),
Expr::AggregateFunction(_) => (Some(e.schema_name().to_string()), e),
_ => (None, e),
let (name, human_display, e) = match e {
Expr::Alias(Alias { expr, name, .. }) => {
(Some(name.clone()), String::default(), expr.as_ref())
}
Expr::AggregateFunction(_) => (
Some(e.schema_name().to_string()),
e.human_display().to_string(),
e,
),
_ => (None, String::default(), e),
};

create_aggregate_expr_with_name_and_maybe_filter(
e,
name,
human_display,
logical_input_schema,
physical_input_schema,
execution_props,
Expand Down
298 changes: 295 additions & 3 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ use sqlparser::ast::{
///
/// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt
///
/// # Printing Expressions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this section and the one below to try and explain the uses of the different explain variants.

///
/// You can print `Expr`s using the the `Debug` trait, `Display` trait, or
/// [`Self::human_display`]. See the [examples](#examples-displaying-exprs) below.
///
/// If you need SQL to pass to other systems, consider using [`Unparser`].
///
/// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
///
/// # Schema Access
///
/// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability
Expand All @@ -76,9 +85,9 @@ use sqlparser::ast::{
/// `Expr` and [`TreeNode::transform`] can be used to rewrite an expression. See
/// the examples below and [`TreeNode`] for more information.
///
/// # Examples
/// # Examples: Creating and Using `Expr`s
///
/// ## Column references and literals
/// ## Column References and Literals
///
/// [`Expr::Column`] refer to the values of columns and are often created with
/// the [`col`] function. For example to create an expression `c1` referring to
Expand All @@ -104,6 +113,7 @@ use sqlparser::ast::{
/// // All literals are strongly typed in DataFusion. To make an `i64` 42:
/// let expr = lit(42i64);
/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42))));
/// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42))));
/// // To make a (typed) NULL:
/// let expr = Expr::Literal(ScalarValue::Int64(None));
/// // to make an (untyped) NULL (the optimizer will coerce this to the correct type):
Expand Down Expand Up @@ -171,7 +181,51 @@ use sqlparser::ast::{
/// ]);
/// ```
///
/// # Visiting and Rewriting `Expr`s
/// # Examples: Displaying `Exprs`
///
/// There are three ways to print an `Expr` depending on the usecase.
///
/// ## Use `Debug` trait
///
/// Following Rust conventions, the `Debug` implementation prints out the
/// internal structure of the expression, which is useful for debugging.
///
/// ```
/// # use datafusion_expr::{lit, col};
/// let expr = col("c1") + lit(42);
/// assert_eq!(format!("{expr:?}"), "BinaryExpr(BinaryExpr { left: Column(Column { relation: None, name: \"c1\" }), op: Plus, right: Literal(Int32(42)) })");
/// ```
///
/// ## Use the `Display` trait (detailed expression)
///
/// The `Display` implementation prints out the expression in a SQL-like form,
/// but has additional details such as the data type of literals. This is useful
/// for understanding the expression in more detail and is used for the low level
/// [`ExplainFormat::Indent`] explain plan format.
///
/// [`ExplainFormat::Indent`]: crate::logical_plan::ExplainFormat::Indent
///
/// ```
/// # use datafusion_expr::{lit, col};
/// let expr = col("c1") + lit(42);
/// assert_eq!(format!("{expr}"), "c1 + Int32(42)");
/// ```
///
/// ## Use [`Self::human_display`] (human readable)
///
/// [`Self::human_display`] prints out the expression in a SQL-like form, optimized
/// for human consumption by end users. It is used for the
/// [`ExplainFormat::Tree`] explain plan format.
///
/// [`ExplainFormat::Tree`]: crate::logical_plan::ExplainFormat::Tree
///
///```
/// # use datafusion_expr::{lit, col};
/// let expr = col("c1") + lit(42);
/// assert_eq!(format!("{}", expr.human_display()), "c1 + 42");
/// ```
///
/// # Examples: Visiting and Rewriting `Expr`s
///
/// Here is an example that finds all literals in an `Expr` tree:
/// ```
Expand Down Expand Up @@ -1147,6 +1201,31 @@ impl Expr {
SchemaDisplay(self)
}

/// Human readable display formatting for this expression.
///
/// This function is primarily used in printing the explain tree output,
/// (e.g. `EXPLAIN FORMAT TREE <query>`), providing a readable format to
/// show how expressions are used in physical and logical plans. See the
/// [`Expr`] for other ways to format expressions
///
/// Note this format is intended for human consumption rather than SQL for
/// other systems. If you need SQL to pass to other systems, consider using
/// [`Unparser`].
///
/// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html
///
/// # Example
/// ```
/// # use datafusion_expr::{col, lit};
/// let expr = col("foo") + lit(42);
/// // For EXPLAIN output:
/// // "foo + 42"
/// println!("{}", expr.human_display());
/// ```
pub fn human_display(&self) -> impl Display + '_ {
SqlDisplay(self)
}

/// Returns the qualifier and the schema name of this expression.
///
/// Used when the expression forms the output field of a certain plan.
Expand Down Expand Up @@ -2596,6 +2675,187 @@ impl Display for SchemaDisplay<'_> {
}
}

/// A helper struct for displaying an `Expr` as an SQL-like string.
struct SqlDisplay<'a>(&'a Expr);

impl Display for SqlDisplay<'_> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already have code to convert from Expr to SQL, in https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html#method.expr_to_sql

However, datafusion-expr doesn't currently depend on the datafusion-sql crate which is probably a good thing

So I think we should add a note in Expr::sql_name that if the user's want syntactically correct SQL to feed to some other system, they should use the Unparser and leave a link to https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the main difference between Display? I found alias is different but others looks similar

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true, it's redundant for most cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should take nested expr into consideration, like aggr(case aggr() when...)

fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self.0 {
Expr::Literal(scalar) => scalar.fmt(f),
Expr::Alias(Alias { name, .. }) => write!(f, "{name}"),
Expr::Between(Between {
expr,
negated,
low,
high,
}) => {
if *negated {
write!(
f,
"{} NOT BETWEEN {} AND {}",
SqlDisplay(expr),
SqlDisplay(low),
SqlDisplay(high),
)
} else {
write!(
f,
"{} BETWEEN {} AND {}",
SqlDisplay(expr),
SqlDisplay(low),
SqlDisplay(high),
)
}
}
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
write!(f, "{} {op} {}", SqlDisplay(left), SqlDisplay(right),)
}
Expr::Case(Case {
expr,
when_then_expr,
else_expr,
}) => {
write!(f, "CASE ")?;

if let Some(e) = expr {
write!(f, "{} ", SqlDisplay(e))?;
}

for (when, then) in when_then_expr {
write!(f, "WHEN {} THEN {} ", SqlDisplay(when), SqlDisplay(then),)?;
}

if let Some(e) = else_expr {
write!(f, "ELSE {} ", SqlDisplay(e))?;
}

write!(f, "END")
}
Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => {
write!(f, "{}", SqlDisplay(expr))
}
Expr::InList(InList {
expr,
list,
negated,
}) => {
write!(
f,
"{}{} IN {}",
SqlDisplay(expr),
if *negated { " NOT" } else { "" },
ExprListDisplay::comma_separated(list.as_slice())
)
}
Expr::GroupingSet(GroupingSet::Cube(exprs)) => {
write!(
f,
"ROLLUP ({})",
ExprListDisplay::comma_separated(exprs.as_slice())
)
}
Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => {
write!(f, "GROUPING SETS (")?;
for exprs in lists_of_exprs.iter() {
write!(
f,
"({})",
ExprListDisplay::comma_separated(exprs.as_slice())
)?;
}
write!(f, ")")
}
Expr::GroupingSet(GroupingSet::Rollup(exprs)) => {
write!(
f,
"ROLLUP ({})",
ExprListDisplay::comma_separated(exprs.as_slice())
)
}
Expr::IsNull(expr) => write!(f, "{} IS NULL", SqlDisplay(expr)),
Expr::IsNotNull(expr) => {
write!(f, "{} IS NOT NULL", SqlDisplay(expr))
}
Expr::IsUnknown(expr) => {
write!(f, "{} IS UNKNOWN", SqlDisplay(expr))
}
Expr::IsNotUnknown(expr) => {
write!(f, "{} IS NOT UNKNOWN", SqlDisplay(expr))
}
Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SqlDisplay(expr)),
Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SqlDisplay(expr)),
Expr::IsNotTrue(expr) => {
write!(f, "{} IS NOT TRUE", SqlDisplay(expr))
}
Expr::IsNotFalse(expr) => {
write!(f, "{} IS NOT FALSE", SqlDisplay(expr))
}
Expr::Like(Like {
negated,
expr,
pattern,
escape_char,
case_insensitive,
}) => {
write!(
f,
"{} {}{} {}",
SqlDisplay(expr),
if *negated { "NOT " } else { "" },
if *case_insensitive { "ILIKE" } else { "LIKE" },
SqlDisplay(pattern),
)?;

if let Some(char) = escape_char {
write!(f, " CHAR '{char}'")?;
}

Ok(())
}
Expr::Negative(expr) => write!(f, "(- {})", SqlDisplay(expr)),
Expr::Not(expr) => write!(f, "NOT {}", SqlDisplay(expr)),
Expr::Unnest(Unnest { expr }) => {
write!(f, "UNNEST({})", SqlDisplay(expr))
}
Expr::SimilarTo(Like {
negated,
expr,
pattern,
escape_char,
..
}) => {
write!(
f,
"{} {} {}",
SqlDisplay(expr),
if *negated {
"NOT SIMILAR TO"
} else {
"SIMILAR TO"
},
SqlDisplay(pattern),
)?;
if let Some(char) = escape_char {
write!(f, " CHAR '{char}'")?;
}

Ok(())
}
Expr::AggregateFunction(AggregateFunction { func, params }) => {
match func.human_display(params) {
Ok(name) => {
write!(f, "{name}")
}
Err(e) => {
write!(f, "got error from schema_name {}", e)
}
}
}
_ => write!(f, "{}", self.0),
}
}
}

/// Get schema_name for Vector of expressions
///
/// Internal usage. Please call `schema_name_from_exprs` instead
Expand All @@ -2607,6 +2867,38 @@ pub(crate) fn schema_name_from_exprs_comma_separated_without_space(
schema_name_from_exprs_inner(exprs, ",")
}

/// Formats a list of `&Expr` with a custom separator using SQL display format
pub struct ExprListDisplay<'a> {
exprs: &'a [Expr],
sep: &'a str,
}

impl<'a> ExprListDisplay<'a> {
/// Create a new display struct with the given expressions and separator
pub fn new(exprs: &'a [Expr], sep: &'a str) -> Self {
Self { exprs, sep }
}

/// Create a new display struct with comma-space separator
pub fn comma_separated(exprs: &'a [Expr]) -> Self {
Self::new(exprs, ", ")
}
}

impl Display for ExprListDisplay<'_> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let mut first = true;
for expr in self.exprs {
if !first {
write!(f, "{}", self.sep)?;
}
write!(f, "{}", SqlDisplay(expr))?;
first = false;
}
Ok(())
}
}

/// Get schema_name for Vector of expressions
pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result<String, fmt::Error> {
schema_name_from_exprs_inner(exprs, ", ")
Expand Down
Loading