Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MINOR: Move simplify_expression rule to datafusion-optimizer crate #2686

Merged
merged 8 commits into from
Jun 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/dev_pr/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ physical-expr:
- datafusion/physical-expr/**/*

optimizer:
- datafusion/core/src/optimizer/**/*
- datafusion/optimizer/**/*

core:
- datafusion/core/**/*
Expand Down
5 changes: 2 additions & 3 deletions datafusion/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@ pub mod datasource;
pub mod error;
pub mod execution;
pub mod logical_plan;
pub mod optimizer;
pub mod physical_optimizer;
pub mod physical_plan;
pub mod prelude;
Expand All @@ -230,10 +229,10 @@ pub use parquet;
pub use datafusion_common as common;
pub use datafusion_data_access;
pub use datafusion_expr as logical_expr;
pub use datafusion_optimizer as optimizer;
pub use datafusion_physical_expr as physical_expr;
pub use datafusion_sql as sql;

pub use datafusion_row as row;
pub use datafusion_sql as sql;

#[cfg(feature = "jit")]
pub use datafusion_jit as jit;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/core/src/logical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
//! physical query plans and executed.

mod expr;
mod expr_simplier;
pub mod plan;
mod registry;
pub mod window_frames;
Expand All @@ -36,6 +35,7 @@ pub use datafusion_expr::{
},
ExprSchemable, Operator,
};
pub use datafusion_optimizer::expr_simplifier::{ExprSimplifiable, SimplifyInfo};
pub use expr::{
abs, acos, and, approx_distinct, approx_percentile_cont, array, ascii, asin, atan,
avg, bit_length, btrim, call_fn, case, ceil, character_length, chr, coalesce, col,
Expand All @@ -54,7 +54,6 @@ pub use expr_rewriter::{
rewrite_sort_cols_by_aggs, unnormalize_col, unnormalize_cols, ExprRewritable,
ExprRewriter, RewriteRecursion,
};
pub use expr_simplier::{ExprSimplifiable, SimplifyInfo};
pub use plan::{provider_as_source, source_as_provider};
pub use plan::{
CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateMemoryTable,
Expand Down
28 changes: 0 additions & 28 deletions datafusion/core/src/optimizer/mod.rs

This file was deleted.

1 change: 1 addition & 0 deletions datafusion/optimizer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,6 @@ async-trait = "0.1.41"
chrono = { version = "0.4", default-features = false }
datafusion-common = { path = "../common", version = "8.0.0" }
datafusion-expr = { path = "../expr", version = "8.0.0" }
datafusion-physical-expr = { path = "../physical-expr", version = "8.0.0" }
hashbrown = { version = "0.12", features = ["raw"] }
log = "^0.4"
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@

//! Expression simplifier

use super::Expr;
use super::ExprRewritable;
use crate::execution::context::ExecutionProps;
use crate::optimizer::simplify_expressions::{ConstEvaluator, Simplifier};
use crate::simplify_expressions::{ConstEvaluator, Simplifier};
use datafusion_common::Result;
use datafusion_expr::{expr_rewriter::ExprRewritable, Expr};
use datafusion_physical_expr::execution_props::ExecutionProps;

#[allow(rustdoc::private_intra_doc_links)]
/// The information necessary to apply algebraic simplification to an
Expand Down Expand Up @@ -54,9 +53,10 @@ impl ExprSimplifiable for Expr {
/// `b > 2`
///
/// ```
/// use datafusion::logical_plan::*;
/// use datafusion::error::Result;
/// use datafusion::execution::context::ExecutionProps;
/// use datafusion_expr::{col, lit, Expr};
/// use datafusion_common::Result;
/// use datafusion_physical_expr::execution_props::ExecutionProps;
/// use datafusion_optimizer::expr_simplifier::{SimplifyInfo, ExprSimplifiable};
///
/// /// Simple implementation that provides `Simplifier` the information it needs
/// #[derive(Default)]
Expand Down
2 changes: 2 additions & 0 deletions datafusion/optimizer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
pub mod common_subexpr_eliminate;
pub mod eliminate_filter;
pub mod eliminate_limit;
pub mod expr_simplifier;
pub mod filter_push_down;
pub mod limit_push_down;
pub mod optimizer;
pub mod projection_push_down;
pub mod simplify_expressions;
pub mod single_distinct_to_groupby;
pub mod subquery_filter_to_join;
pub mod utils;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@

//! Simplify expressions optimizer rule

use crate::execution::context::ExecutionProps;
use crate::logical_plan::{ExprSimplifiable, SimplifyInfo};
use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
use crate::expr_simplifier::ExprSimplifiable;
use crate::{expr_simplifier::SimplifyInfo, OptimizerConfig, OptimizerRule};
use arrow::array::new_null_array;
use arrow::datatypes::{DataType, Field, Schema};
use arrow::record_batch::RecordBatch;
Expand All @@ -30,9 +29,9 @@ use datafusion_expr::{
lit,
logical_plan::LogicalPlan,
utils::from_plan,
Expr, ExprSchemable, Operator, Volatility,
ColumnarValue, Expr, ExprSchemable, Operator, Volatility,
};
use datafusion_physical_expr::create_physical_expr;
use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};

/// Provides simplification information based on schema and properties
pub(crate) struct SimplifyContext<'a, 'b> {
Expand Down Expand Up @@ -95,7 +94,7 @@ impl<'a, 'b> SimplifyInfo for SimplifyContext<'a, 'b> {
/// `Filter: b > 2`
///
#[derive(Default)]
pub(crate) struct SimplifyExpressions {}
pub struct SimplifyExpressions {}

/// returns true if `needle` is found in a chain of search_op
/// expressions. Such as: (A AND B) AND C
Expand Down Expand Up @@ -265,13 +264,13 @@ impl SimplifyExpressions {
/// --> `a`, which is handled by [`Simplifier`]
///
/// ```
/// # use datafusion::prelude::*;
/// # use datafusion::logical_plan::ExprRewritable;
/// # use datafusion::optimizer::simplify_expressions::ConstEvaluator;
/// # use datafusion::execution::context::ExecutionProps;
/// # use datafusion_expr::{col, lit};
/// # use datafusion_optimizer::simplify_expressions::ConstEvaluator;
/// # use datafusion_physical_expr::execution_props::ExecutionProps;
/// # use datafusion_expr::expr_rewriter::ExprRewritable;
///
/// let optimizer_config = ExecutionProps::new();
/// let mut const_evaluator = ConstEvaluator::new(&optimizer_config);
/// let execution_props = ExecutionProps::new();
/// let mut const_evaluator = ConstEvaluator::new(&execution_props);
///
/// // (1 + 2) + a
/// let expr = (lit(1) + lit(2)) + col("a");
Expand All @@ -295,7 +294,7 @@ pub struct ConstEvaluator<'a> {
/// descendants) so this Expr can be evaluated
can_evaluate: Vec<bool>,

optimizer_config: &'a ExecutionProps,
execution_props: &'a ExecutionProps,
input_schema: DFSchema,
input_batch: RecordBatch,
}
Expand Down Expand Up @@ -341,8 +340,8 @@ impl<'a> ExprRewriter for ConstEvaluator<'a> {
impl<'a> ConstEvaluator<'a> {
/// Create a new `ConstantEvaluator`. Session constants (such as
/// the time for `now()` are taken from the passed
/// `optimizer_config`.
pub fn new(optimizer_config: &'a ExecutionProps) -> Self {
/// `execution_props`.
pub fn new(execution_props: &'a ExecutionProps) -> Self {
let input_schema = DFSchema::empty();

// The dummy column name is unused and doesn't matter as only
Expand All @@ -357,7 +356,7 @@ impl<'a> ConstEvaluator<'a> {

Self {
can_evaluate: vec![],
optimizer_config,
execution_props,
input_schema,
input_batch,
}
Expand Down Expand Up @@ -423,11 +422,11 @@ impl<'a> ConstEvaluator<'a> {
&expr,
&self.input_schema,
&self.input_batch.schema(),
self.optimizer_config,
self.execution_props,
)?;
let col_val = phys_expr.evaluate(&self.input_batch)?;
match col_val {
crate::physical_plan::ColumnarValue::Array(a) => {
ColumnarValue::Array(a) => {
if a.len() != 1 {
Err(DataFusionError::Execution(format!(
"Could not evaluate the expressison, found a result of length {}",
Expand All @@ -437,7 +436,7 @@ impl<'a> ConstEvaluator<'a> {
Ok(ScalarValue::try_from_array(&a, 0)?)
}
}
crate::physical_plan::ColumnarValue::Scalar(s) => Ok(s),
ColumnarValue::Scalar(s) => Ok(s),
}
}
}
Expand Down Expand Up @@ -745,22 +744,42 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> {
}
}

/// A macro to assert that one string is contained within another with
/// a nice error message if they are not.
///
/// Usage: `assert_contains!(actual, expected)`
///
/// Is a macro so test error
/// messages are on the same line as the failure;
///
/// Both arguments must be convertable into Strings (Into<String>)
#[macro_export]
macro_rules! assert_contains {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is copy-and-pasted from the core crate

($ACTUAL: expr, $EXPECTED: expr) => {
let actual_value: String = $ACTUAL.into();
let expected_value: String = $EXPECTED.into();
assert!(
actual_value.contains(&expected_value),
"Can not find expected in actual.\n\nExpected:\n{}\n\nActual:\n{}",
expected_value,
actual_value
);
};
}

#[cfg(test)]
mod tests {
use super::*;
use crate::assert_contains;
use crate::logical_plan::{call_fn, create_udf};
use crate::physical_plan::functions::make_scalar_function;
use crate::physical_plan::udf::ScalarUDF;
use crate::test_util::scan_empty;
use arrow::array::{ArrayRef, Int32Array};
use chrono::{DateTime, TimeZone, Utc};
use datafusion_common::DFField;
use datafusion_expr::logical_plan::table_scan;
use datafusion_expr::{
and, binary_expr, col, lit, lit_timestamp_nano,
and, binary_expr, call_fn, col, create_udf, lit, lit_timestamp_nano,
logical_plan::builder::LogicalPlanBuilder, BuiltinScalarFunction, Expr,
ExprSchemable,
ExprSchemable, ScalarUDF,
};
use datafusion_physical_expr::functions::make_scalar_function;
use std::collections::HashMap;
use std::sync::Arc;

Expand Down Expand Up @@ -1192,12 +1211,12 @@ mod tests {
expected_expr: Expr,
date_time: &DateTime<Utc>,
) {
let optimizer_config = ExecutionProps {
let execution_props = ExecutionProps {
query_execution_start_time: *date_time,
var_providers: None,
};

let mut const_evaluator = ConstEvaluator::new(&optimizer_config);
let mut const_evaluator = ConstEvaluator::new(&execution_props);
let evaluated_expr = input_expr
.clone()
.rewrite(&mut const_evaluator)
Expand All @@ -1220,8 +1239,8 @@ mod tests {

fn simplify(expr: Expr) -> Expr {
let schema = expr_test_schema();
let optimizer_config = ExecutionProps::new();
let info = SimplifyContext::new(vec![&schema], &optimizer_config);
let execution_props = ExecutionProps::new();
let info = SimplifyContext::new(vec![&schema], &execution_props);
expr.simplify(&info).unwrap()
}

Expand Down Expand Up @@ -1522,7 +1541,7 @@ mod tests {
Field::new("c", DataType::Boolean, false),
Field::new("d", DataType::UInt32, false),
]);
scan_empty(Some("test"), &schema, None)
table_scan(Some("test"), &schema, None)
.expect("creating scan")
.build()
.expect("building plan")
Expand Down Expand Up @@ -1710,8 +1729,8 @@ mod tests {
.aggregate(
vec![col("a"), col("c")],
vec![
crate::logical_plan::max(col("b").eq(lit(true))),
crate::logical_plan::min(col("b")),
datafusion_expr::max(col("b").eq(lit(true))),
datafusion_expr::min(col("b")),
],
)
.unwrap()
Expand Down