Skip to content

Commit

Permalink
MINOR: Move simplify_expression rule to datafusion-optimizer crate (
Browse files Browse the repository at this point in the history
  • Loading branch information
andygrove authored Jun 5, 2022
1 parent a690a28 commit 9306534
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 74 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/dev_pr/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ physical-expr:
- datafusion/physical-expr/**/*

optimizer:
- datafusion/core/src/optimizer/**/*
- datafusion/optimizer/**/*

core:
- datafusion/core/**/*
Expand Down
5 changes: 2 additions & 3 deletions datafusion/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@ pub mod datasource;
pub mod error;
pub mod execution;
pub mod logical_plan;
pub mod optimizer;
pub mod physical_optimizer;
pub mod physical_plan;
pub mod prelude;
Expand All @@ -230,10 +229,10 @@ pub use parquet;
pub use datafusion_common as common;
pub use datafusion_data_access;
pub use datafusion_expr as logical_expr;
pub use datafusion_optimizer as optimizer;
pub use datafusion_physical_expr as physical_expr;
pub use datafusion_sql as sql;

pub use datafusion_row as row;
pub use datafusion_sql as sql;

#[cfg(feature = "jit")]
pub use datafusion_jit as jit;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/core/src/logical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
//! physical query plans and executed.
mod expr;
mod expr_simplier;
pub mod plan;
mod registry;
pub mod window_frames;
Expand All @@ -36,6 +35,7 @@ pub use datafusion_expr::{
},
ExprSchemable, Operator,
};
pub use datafusion_optimizer::expr_simplifier::{ExprSimplifiable, SimplifyInfo};
pub use expr::{
abs, acos, and, approx_distinct, approx_percentile_cont, array, ascii, asin, atan,
avg, bit_length, btrim, call_fn, case, ceil, character_length, chr, coalesce, col,
Expand All @@ -54,7 +54,6 @@ pub use expr_rewriter::{
rewrite_sort_cols_by_aggs, unnormalize_col, unnormalize_cols, ExprRewritable,
ExprRewriter, RewriteRecursion,
};
pub use expr_simplier::{ExprSimplifiable, SimplifyInfo};
pub use plan::{provider_as_source, source_as_provider};
pub use plan::{
CreateCatalog, CreateCatalogSchema, CreateExternalTable, CreateMemoryTable,
Expand Down
28 changes: 0 additions & 28 deletions datafusion/core/src/optimizer/mod.rs

This file was deleted.

1 change: 1 addition & 0 deletions datafusion/optimizer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,6 @@ async-trait = "0.1.41"
chrono = { version = "0.4", default-features = false }
datafusion-common = { path = "../common", version = "8.0.0" }
datafusion-expr = { path = "../expr", version = "8.0.0" }
datafusion-physical-expr = { path = "../physical-expr", version = "8.0.0" }
hashbrown = { version = "0.12", features = ["raw"] }
log = "^0.4"
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@

//! Expression simplifier
use super::Expr;
use super::ExprRewritable;
use crate::execution::context::ExecutionProps;
use crate::optimizer::simplify_expressions::{ConstEvaluator, Simplifier};
use crate::simplify_expressions::{ConstEvaluator, Simplifier};
use datafusion_common::Result;
use datafusion_expr::{expr_rewriter::ExprRewritable, Expr};
use datafusion_physical_expr::execution_props::ExecutionProps;

#[allow(rustdoc::private_intra_doc_links)]
/// The information necessary to apply algebraic simplification to an
Expand Down Expand Up @@ -54,9 +53,10 @@ impl ExprSimplifiable for Expr {
/// `b > 2`
///
/// ```
/// use datafusion::logical_plan::*;
/// use datafusion::error::Result;
/// use datafusion::execution::context::ExecutionProps;
/// use datafusion_expr::{col, lit, Expr};
/// use datafusion_common::Result;
/// use datafusion_physical_expr::execution_props::ExecutionProps;
/// use datafusion_optimizer::expr_simplifier::{SimplifyInfo, ExprSimplifiable};
///
/// /// Simple implementation that provides `Simplifier` the information it needs
/// #[derive(Default)]
Expand Down
2 changes: 2 additions & 0 deletions datafusion/optimizer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
pub mod common_subexpr_eliminate;
pub mod eliminate_filter;
pub mod eliminate_limit;
pub mod expr_simplifier;
pub mod filter_push_down;
pub mod limit_push_down;
pub mod optimizer;
pub mod projection_push_down;
pub mod simplify_expressions;
pub mod single_distinct_to_groupby;
pub mod subquery_filter_to_join;
pub mod utils;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@

//! Simplify expressions optimizer rule
use crate::execution::context::ExecutionProps;
use crate::logical_plan::{ExprSimplifiable, SimplifyInfo};
use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
use crate::expr_simplifier::ExprSimplifiable;
use crate::{expr_simplifier::SimplifyInfo, OptimizerConfig, OptimizerRule};
use arrow::array::new_null_array;
use arrow::datatypes::{DataType, Field, Schema};
use arrow::record_batch::RecordBatch;
Expand All @@ -30,9 +29,9 @@ use datafusion_expr::{
lit,
logical_plan::LogicalPlan,
utils::from_plan,
Expr, ExprSchemable, Operator, Volatility,
ColumnarValue, Expr, ExprSchemable, Operator, Volatility,
};
use datafusion_physical_expr::create_physical_expr;
use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionProps};

/// Provides simplification information based on schema and properties
pub(crate) struct SimplifyContext<'a, 'b> {
Expand Down Expand Up @@ -95,7 +94,7 @@ impl<'a, 'b> SimplifyInfo for SimplifyContext<'a, 'b> {
/// `Filter: b > 2`
///
#[derive(Default)]
pub(crate) struct SimplifyExpressions {}
pub struct SimplifyExpressions {}

/// returns true if `needle` is found in a chain of search_op
/// expressions. Such as: (A AND B) AND C
Expand Down Expand Up @@ -265,13 +264,13 @@ impl SimplifyExpressions {
/// --> `a`, which is handled by [`Simplifier`]
///
/// ```
/// # use datafusion::prelude::*;
/// # use datafusion::logical_plan::ExprRewritable;
/// # use datafusion::optimizer::simplify_expressions::ConstEvaluator;
/// # use datafusion::execution::context::ExecutionProps;
/// # use datafusion_expr::{col, lit};
/// # use datafusion_optimizer::simplify_expressions::ConstEvaluator;
/// # use datafusion_physical_expr::execution_props::ExecutionProps;
/// # use datafusion_expr::expr_rewriter::ExprRewritable;
///
/// let optimizer_config = ExecutionProps::new();
/// let mut const_evaluator = ConstEvaluator::new(&optimizer_config);
/// let execution_props = ExecutionProps::new();
/// let mut const_evaluator = ConstEvaluator::new(&execution_props);
///
/// // (1 + 2) + a
/// let expr = (lit(1) + lit(2)) + col("a");
Expand All @@ -295,7 +294,7 @@ pub struct ConstEvaluator<'a> {
/// descendants) so this Expr can be evaluated
can_evaluate: Vec<bool>,

optimizer_config: &'a ExecutionProps,
execution_props: &'a ExecutionProps,
input_schema: DFSchema,
input_batch: RecordBatch,
}
Expand Down Expand Up @@ -341,8 +340,8 @@ impl<'a> ExprRewriter for ConstEvaluator<'a> {
impl<'a> ConstEvaluator<'a> {
/// Create a new `ConstantEvaluator`. Session constants (such as
/// the time for `now()` are taken from the passed
/// `optimizer_config`.
pub fn new(optimizer_config: &'a ExecutionProps) -> Self {
/// `execution_props`.
pub fn new(execution_props: &'a ExecutionProps) -> Self {
let input_schema = DFSchema::empty();

// The dummy column name is unused and doesn't matter as only
Expand All @@ -357,7 +356,7 @@ impl<'a> ConstEvaluator<'a> {

Self {
can_evaluate: vec![],
optimizer_config,
execution_props,
input_schema,
input_batch,
}
Expand Down Expand Up @@ -423,11 +422,11 @@ impl<'a> ConstEvaluator<'a> {
&expr,
&self.input_schema,
&self.input_batch.schema(),
self.optimizer_config,
self.execution_props,
)?;
let col_val = phys_expr.evaluate(&self.input_batch)?;
match col_val {
crate::physical_plan::ColumnarValue::Array(a) => {
ColumnarValue::Array(a) => {
if a.len() != 1 {
Err(DataFusionError::Execution(format!(
"Could not evaluate the expressison, found a result of length {}",
Expand All @@ -437,7 +436,7 @@ impl<'a> ConstEvaluator<'a> {
Ok(ScalarValue::try_from_array(&a, 0)?)
}
}
crate::physical_plan::ColumnarValue::Scalar(s) => Ok(s),
ColumnarValue::Scalar(s) => Ok(s),
}
}
}
Expand Down Expand Up @@ -745,22 +744,42 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> {
}
}

/// A macro to assert that one string is contained within another with
/// a nice error message if they are not.
///
/// Usage: `assert_contains!(actual, expected)`
///
/// Is a macro so test error
/// messages are on the same line as the failure;
///
/// Both arguments must be convertable into Strings (Into<String>)
#[macro_export]
macro_rules! assert_contains {
($ACTUAL: expr, $EXPECTED: expr) => {
let actual_value: String = $ACTUAL.into();
let expected_value: String = $EXPECTED.into();
assert!(
actual_value.contains(&expected_value),
"Can not find expected in actual.\n\nExpected:\n{}\n\nActual:\n{}",
expected_value,
actual_value
);
};
}

#[cfg(test)]
mod tests {
use super::*;
use crate::assert_contains;
use crate::logical_plan::{call_fn, create_udf};
use crate::physical_plan::functions::make_scalar_function;
use crate::physical_plan::udf::ScalarUDF;
use crate::test_util::scan_empty;
use arrow::array::{ArrayRef, Int32Array};
use chrono::{DateTime, TimeZone, Utc};
use datafusion_common::DFField;
use datafusion_expr::logical_plan::table_scan;
use datafusion_expr::{
and, binary_expr, col, lit, lit_timestamp_nano,
and, binary_expr, call_fn, col, create_udf, lit, lit_timestamp_nano,
logical_plan::builder::LogicalPlanBuilder, BuiltinScalarFunction, Expr,
ExprSchemable,
ExprSchemable, ScalarUDF,
};
use datafusion_physical_expr::functions::make_scalar_function;
use std::collections::HashMap;
use std::sync::Arc;

Expand Down Expand Up @@ -1192,12 +1211,12 @@ mod tests {
expected_expr: Expr,
date_time: &DateTime<Utc>,
) {
let optimizer_config = ExecutionProps {
let execution_props = ExecutionProps {
query_execution_start_time: *date_time,
var_providers: None,
};

let mut const_evaluator = ConstEvaluator::new(&optimizer_config);
let mut const_evaluator = ConstEvaluator::new(&execution_props);
let evaluated_expr = input_expr
.clone()
.rewrite(&mut const_evaluator)
Expand All @@ -1220,8 +1239,8 @@ mod tests {

fn simplify(expr: Expr) -> Expr {
let schema = expr_test_schema();
let optimizer_config = ExecutionProps::new();
let info = SimplifyContext::new(vec![&schema], &optimizer_config);
let execution_props = ExecutionProps::new();
let info = SimplifyContext::new(vec![&schema], &execution_props);
expr.simplify(&info).unwrap()
}

Expand Down Expand Up @@ -1522,7 +1541,7 @@ mod tests {
Field::new("c", DataType::Boolean, false),
Field::new("d", DataType::UInt32, false),
]);
scan_empty(Some("test"), &schema, None)
table_scan(Some("test"), &schema, None)
.expect("creating scan")
.build()
.expect("building plan")
Expand Down Expand Up @@ -1710,8 +1729,8 @@ mod tests {
.aggregate(
vec![col("a"), col("c")],
vec![
crate::logical_plan::max(col("b").eq(lit(true))),
crate::logical_plan::min(col("b")),
datafusion_expr::max(col("b").eq(lit(true))),
datafusion_expr::min(col("b")),
],
)
.unwrap()
Expand Down

0 comments on commit 9306534

Please sign in to comment.