From 0e95ab4ca69e2384fab2d2f2646901474950bb62 Mon Sep 17 00:00:00 2001 From: Xwg Date: Sun, 12 Nov 2023 20:06:01 +0800 Subject: [PATCH 1/9] feat(optimizer): Implement LIKE operator rule for query optimization The commit introduces a new rule for the optimization of LIKE operator in SQL queries. The LIKE operator expressions are rewritten to make use of binary operators such as GtEq and Lt in certain cases which enhances the performance of queries. Additionally, new tests for incremented character rule have been added, and `LikeRewrite` has been added to optimizer rules in the rule set. --- src/db.rs | 18 ++++- src/optimizer/rule/mod.rs | 5 +- src/optimizer/rule/simplification.rs | 112 +++++++++++++++++++++++++-- 3 files changed, 122 insertions(+), 13 deletions(-) diff --git a/src/db.rs b/src/db.rs index 9c05b919..3616c7ee 100644 --- a/src/db.rs +++ b/src/db.rs @@ -54,10 +54,10 @@ impl Database { /// Limit(1) /// Project(a,b) let source_plan = binder.bind(&stmts[0])?; - // println!("source_plan plan: {:#?}", source_plan); + //println!("source_plan plan: {:#?}", source_plan); let best_plan = Self::default_optimizer(source_plan).find_best()?; - // println!("best_plan plan: {:#?}", best_plan); + //println!("best_plan plan: {:#?}", best_plan); let transaction = RefCell::new(transaction); let mut stream = build(best_plan, &transaction); @@ -78,10 +78,10 @@ impl Database { .batch( "Simplify Filter".to_string(), HepBatchStrategy::fix_point_topdown(10), - vec![RuleImpl::SimplifyFilter, RuleImpl::ConstantCalculation], + vec![RuleImpl::LikeRewrite, RuleImpl::SimplifyFilter, RuleImpl::ConstantCalculation], ) .batch( - "Predicate Pushdown".to_string(), + "Predicate Pushown".to_string(), HepBatchStrategy::fix_point_topdown(10), vec![ RuleImpl::PushPredicateThroughJoin, @@ -206,6 +206,12 @@ mod test { let _ = kipsql .run("insert into t3 (a, b) values (4, 4444), (5, 5222), (6, 1.00)") .await?; + let _ = kipsql + .run("create table t4 (a int primary key, b varchar(100))") + .await?; + let _ = kipsql + .run("insert into t4 (a, b) values (1, 'abc'), (2, 'abdc'), (3, 'abcd'), (4, 'ddabc')") + .await?; println!("show tables:"); let tuples_show_tables = kipsql.run("show tables").await?; @@ -371,6 +377,10 @@ mod test { let tuples_decimal = kipsql.run("select * from t3").await?; println!("{}", create_table(&tuples_decimal)); + println!("like rewrite:"); + let tuples_like_rewrite = kipsql.run("select * from t4 where b like 'abc%'").await?; + println!("{}", create_table(&tuples_like_rewrite)); + Ok(()) } } diff --git a/src/optimizer/rule/mod.rs b/src/optimizer/rule/mod.rs index 1c9bbbed..969b6b31 100644 --- a/src/optimizer/rule/mod.rs +++ b/src/optimizer/rule/mod.rs @@ -9,7 +9,7 @@ use crate::optimizer::rule::pushdown_limit::{ }; use crate::optimizer::rule::pushdown_predicates::PushPredicateIntoScan; use crate::optimizer::rule::pushdown_predicates::PushPredicateThroughJoin; -use crate::optimizer::rule::simplification::ConstantCalculation; +use crate::optimizer::rule::simplification::{ConstantCalculation, LikeRewrite}; use crate::optimizer::rule::simplification::SimplifyFilter; use crate::optimizer::OptimizerError; @@ -37,6 +37,7 @@ pub enum RuleImpl { // Simplification SimplifyFilter, ConstantCalculation, + LikeRewrite, } impl Rule for RuleImpl { @@ -53,6 +54,7 @@ impl Rule for RuleImpl { RuleImpl::PushPredicateIntoScan => PushPredicateIntoScan.pattern(), RuleImpl::SimplifyFilter => SimplifyFilter.pattern(), RuleImpl::ConstantCalculation => ConstantCalculation.pattern(), + RuleImpl::LikeRewrite =>LikeRewrite.pattern(), } } @@ -69,6 +71,7 @@ impl Rule for RuleImpl { RuleImpl::SimplifyFilter => SimplifyFilter.apply(node_id, graph), RuleImpl::PushPredicateIntoScan => PushPredicateIntoScan.apply(node_id, graph), RuleImpl::ConstantCalculation => ConstantCalculation.apply(node_id, graph), + RuleImpl::LikeRewrite => LikeRewrite.apply(node_id, graph), } } } diff --git a/src/optimizer/rule/simplification.rs b/src/optimizer/rule/simplification.rs index 3f004451..884d0cc7 100644 --- a/src/optimizer/rule/simplification.rs +++ b/src/optimizer/rule/simplification.rs @@ -5,7 +5,15 @@ use crate::optimizer::OptimizerError; use crate::planner::operator::join::JoinCondition; use crate::planner::operator::Operator; use lazy_static::lazy_static; +use crate::expression::{BinaryOperator, ScalarExpression}; +use crate::types::value::{DataValue, ValueRef}; lazy_static! { + static ref LIKE_REWRITE_RULE: Pattern = { + Pattern { + predicate: |op| matches!(op, Operator::Filter(_)), + children: PatternChildrenPredicate::None, + } + }; static ref CONSTANT_CALCULATION_RULE: Pattern = { Pattern { predicate: |_| true, @@ -109,6 +117,84 @@ impl Rule for SimplifyFilter { } } +pub struct LikeRewrite; + +impl Rule for LikeRewrite { + fn pattern(&self) -> &Pattern { + &LIKE_REWRITE_RULE + } + + fn apply(&self, node_id: HepNodeId, graph: &mut HepGraph) -> Result<(), OptimizerError> { + if let Operator::Filter(mut filter_op) = graph.operator(node_id).clone() { + // if is like expression + if let ScalarExpression::Binary { + op: BinaryOperator::Like, + left_expr, + right_expr, + ty, + } = &mut filter_op.predicate + { + // if left is column and right is constant + if let ScalarExpression::ColumnRef(_) = left_expr.as_ref() { + if let ScalarExpression::Constant(value) = right_expr.as_ref() { + match value.as_ref() { + DataValue::Utf8(val_str) => { + let mut value = val_str.clone().unwrap_or_else(|| "".to_string()); + + if value.ends_with('%') { + value.pop(); // remove '%' + if let Some(last_char) = value.clone().pop() { + if let Some(next_char) = increment_char(last_char) { + let mut new_value = value.clone(); + new_value.pop(); + new_value.push(next_char); + + let new_expr = ScalarExpression::Binary { + op: BinaryOperator::And, + left_expr: Box::new(ScalarExpression::Binary { + op: BinaryOperator::GtEq, + left_expr: left_expr.clone(), + right_expr: Box::new(ScalarExpression::Constant(ValueRef::from(DataValue::Utf8(Some(value))))), + ty: ty.clone(), + }), + right_expr: Box::new(ScalarExpression::Binary { + op: BinaryOperator::Lt, + left_expr: left_expr.clone(), + right_expr: Box::new(ScalarExpression::Constant(ValueRef::from(DataValue::Utf8(Some(new_value))))), + ty: ty.clone(), + }), + ty: ty.clone(), + }; + filter_op.predicate = new_expr; + } + } + } + } + _ => { + graph.version += 1; + return Ok(()); + } + } + } + } + } + graph.replace_node(node_id, Operator::Filter(filter_op)) + } + // mark changed to skip this rule batch + graph.version += 1; + Ok(()) + } +} + +fn increment_char(v: char) -> Option { + match v { + 'z' => None, + 'Z' => None, + _ => std::char::from_u32(v as u32 + 1), + } +} + + #[cfg(test)] mod test { use crate::binder::test::select_sql_run; @@ -126,6 +212,15 @@ mod test { use crate::types::LogicalType; use std::collections::Bound; use std::sync::Arc; + use crate::optimizer::rule::simplification::increment_char; + + + #[test] + fn test_increment_char() { + assert_eq!(increment_char('a'), Some('b')); + assert_eq!(increment_char('z'), None); + assert_eq!(increment_char('A'), Some('B')); + } #[tokio::test] async fn test_constant_calculation_omitted() -> Result<(), DatabaseError> { @@ -302,6 +397,7 @@ mod test { Ok(()) } + #[tokio::test] async fn test_simplify_filter_multiple_column() -> Result<(), DatabaseError> { // c1 + 1 < -1 => c1 < -2 @@ -343,7 +439,7 @@ mod test { cb_1_c1, Some(ConstantBinary::Scope { min: Bound::Unbounded, - max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))) + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))), }) ); @@ -353,7 +449,7 @@ mod test { cb_1_c2, Some(ConstantBinary::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(2)))), - max: Bound::Unbounded + max: Bound::Unbounded, }) ); @@ -363,7 +459,7 @@ mod test { cb_2_c1, Some(ConstantBinary::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(2)))), - max: Bound::Unbounded + max: Bound::Unbounded, }) ); @@ -373,7 +469,7 @@ mod test { cb_1_c1, Some(ConstantBinary::Scope { min: Bound::Unbounded, - max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))) + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-2)))), }) ); @@ -383,7 +479,7 @@ mod test { cb_3_c1, Some(ConstantBinary::Scope { min: Bound::Unbounded, - max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1)))) + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1)))), }) ); @@ -393,7 +489,7 @@ mod test { cb_3_c2, Some(ConstantBinary::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))), - max: Bound::Unbounded + max: Bound::Unbounded, }) ); @@ -403,7 +499,7 @@ mod test { cb_4_c1, Some(ConstantBinary::Scope { min: Bound::Excluded(Arc::new(DataValue::Int32(Some(0)))), - max: Bound::Unbounded + max: Bound::Unbounded, }) ); @@ -413,7 +509,7 @@ mod test { cb_4_c2, Some(ConstantBinary::Scope { min: Bound::Unbounded, - max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1)))) + max: Bound::Excluded(Arc::new(DataValue::Int32(Some(-1)))), }) ); From e99978974d0917633e683078929d16ce72863257 Mon Sep 17 00:00:00 2001 From: Xwg Date: Sun, 12 Nov 2023 20:19:19 +0800 Subject: [PATCH 2/9] fmt --- src/db.rs | 6 +++++- src/optimizer/rule/mod.rs | 4 ++-- src/optimizer/rule/simplification.rs | 23 ++++++++++++++--------- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/db.rs b/src/db.rs index 3616c7ee..f43f3dd1 100644 --- a/src/db.rs +++ b/src/db.rs @@ -78,7 +78,11 @@ impl Database { .batch( "Simplify Filter".to_string(), HepBatchStrategy::fix_point_topdown(10), - vec![RuleImpl::LikeRewrite, RuleImpl::SimplifyFilter, RuleImpl::ConstantCalculation], + vec![ + RuleImpl::LikeRewrite, + RuleImpl::SimplifyFilter, + RuleImpl::ConstantCalculation, + ], ) .batch( "Predicate Pushown".to_string(), diff --git a/src/optimizer/rule/mod.rs b/src/optimizer/rule/mod.rs index 969b6b31..a908a460 100644 --- a/src/optimizer/rule/mod.rs +++ b/src/optimizer/rule/mod.rs @@ -9,8 +9,8 @@ use crate::optimizer::rule::pushdown_limit::{ }; use crate::optimizer::rule::pushdown_predicates::PushPredicateIntoScan; use crate::optimizer::rule::pushdown_predicates::PushPredicateThroughJoin; -use crate::optimizer::rule::simplification::{ConstantCalculation, LikeRewrite}; use crate::optimizer::rule::simplification::SimplifyFilter; +use crate::optimizer::rule::simplification::{ConstantCalculation, LikeRewrite}; use crate::optimizer::OptimizerError; mod column_pruning; @@ -54,7 +54,7 @@ impl Rule for RuleImpl { RuleImpl::PushPredicateIntoScan => PushPredicateIntoScan.pattern(), RuleImpl::SimplifyFilter => SimplifyFilter.pattern(), RuleImpl::ConstantCalculation => ConstantCalculation.pattern(), - RuleImpl::LikeRewrite =>LikeRewrite.pattern(), + RuleImpl::LikeRewrite => LikeRewrite.pattern(), } } diff --git a/src/optimizer/rule/simplification.rs b/src/optimizer/rule/simplification.rs index 884d0cc7..cc2db9d6 100644 --- a/src/optimizer/rule/simplification.rs +++ b/src/optimizer/rule/simplification.rs @@ -1,12 +1,12 @@ +use crate::expression::{BinaryOperator, ScalarExpression}; use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::Rule; use crate::optimizer::heuristic::graph::{HepGraph, HepNodeId}; use crate::optimizer::OptimizerError; use crate::planner::operator::join::JoinCondition; use crate::planner::operator::Operator; -use lazy_static::lazy_static; -use crate::expression::{BinaryOperator, ScalarExpression}; use crate::types::value::{DataValue, ValueRef}; +use lazy_static::lazy_static; lazy_static! { static ref LIKE_REWRITE_RULE: Pattern = { Pattern { @@ -154,13 +154,21 @@ impl Rule for LikeRewrite { left_expr: Box::new(ScalarExpression::Binary { op: BinaryOperator::GtEq, left_expr: left_expr.clone(), - right_expr: Box::new(ScalarExpression::Constant(ValueRef::from(DataValue::Utf8(Some(value))))), + right_expr: Box::new( + ScalarExpression::Constant(ValueRef::from( + DataValue::Utf8(Some(value)), + )), + ), ty: ty.clone(), }), right_expr: Box::new(ScalarExpression::Binary { op: BinaryOperator::Lt, left_expr: left_expr.clone(), - right_expr: Box::new(ScalarExpression::Constant(ValueRef::from(DataValue::Utf8(Some(new_value))))), + right_expr: Box::new( + ScalarExpression::Constant(ValueRef::from( + DataValue::Utf8(Some(new_value)), + )), + ), ty: ty.clone(), }), ty: ty.clone(), @@ -190,11 +198,10 @@ fn increment_char(v: char) -> Option { match v { 'z' => None, 'Z' => None, - _ => std::char::from_u32(v as u32 + 1), + _ => std::char::from_u32(v as u32 + 1), } } - #[cfg(test)] mod test { use crate::binder::test::select_sql_run; @@ -204,6 +211,7 @@ mod test { use crate::expression::{BinaryOperator, ScalarExpression, UnaryOperator}; use crate::optimizer::heuristic::batch::HepBatchStrategy; use crate::optimizer::heuristic::optimizer::HepOptimizer; + use crate::optimizer::rule::simplification::increment_char; use crate::optimizer::rule::RuleImpl; use crate::planner::operator::filter::FilterOperator; use crate::planner::operator::Operator; @@ -212,8 +220,6 @@ mod test { use crate::types::LogicalType; use std::collections::Bound; use std::sync::Arc; - use crate::optimizer::rule::simplification::increment_char; - #[test] fn test_increment_char() { @@ -397,7 +403,6 @@ mod test { Ok(()) } - #[tokio::test] async fn test_simplify_filter_multiple_column() -> Result<(), DatabaseError> { // c1 + 1 < -1 => c1 < -2 From 5ce7a826ddd3df812b195e16f36a507889135975 Mon Sep 17 00:00:00 2001 From: Xwg Date: Tue, 5 Dec 2023 23:37:26 +0800 Subject: [PATCH 3/9] fix(rbo): Rewrite code logic --- src/db.rs | 2 +- src/optimizer/rule/simplification.rs | 110 +++++++++++++-------------- 2 files changed, 54 insertions(+), 58 deletions(-) diff --git a/src/db.rs b/src/db.rs index f43f3dd1..dcecb206 100644 --- a/src/db.rs +++ b/src/db.rs @@ -85,7 +85,7 @@ impl Database { ], ) .batch( - "Predicate Pushown".to_string(), + "Predicate Pushdown".to_string(), HepBatchStrategy::fix_point_topdown(10), vec![ RuleImpl::PushPredicateThroughJoin, diff --git a/src/optimizer/rule/simplification.rs b/src/optimizer/rule/simplification.rs index cc2db9d6..6b092cd4 100644 --- a/src/optimizer/rule/simplification.rs +++ b/src/optimizer/rule/simplification.rs @@ -7,6 +7,7 @@ use crate::planner::operator::join::JoinCondition; use crate::planner::operator::Operator; use crate::types::value::{DataValue, ValueRef}; use lazy_static::lazy_static; +use crate::types::LogicalType; lazy_static! { static ref LIKE_REWRITE_RULE: Pattern = { Pattern { @@ -134,72 +135,67 @@ impl Rule for LikeRewrite { ty, } = &mut filter_op.predicate { - // if left is column and right is constant - if let ScalarExpression::ColumnRef(_) = left_expr.as_ref() { - if let ScalarExpression::Constant(value) = right_expr.as_ref() { - match value.as_ref() { - DataValue::Utf8(val_str) => { - let mut value = val_str.clone().unwrap_or_else(|| "".to_string()); - - if value.ends_with('%') { - value.pop(); // remove '%' - if let Some(last_char) = value.clone().pop() { - if let Some(next_char) = increment_char(last_char) { - let mut new_value = value.clone(); - new_value.pop(); - new_value.push(next_char); - - let new_expr = ScalarExpression::Binary { - op: BinaryOperator::And, - left_expr: Box::new(ScalarExpression::Binary { - op: BinaryOperator::GtEq, - left_expr: left_expr.clone(), - right_expr: Box::new( - ScalarExpression::Constant(ValueRef::from( - DataValue::Utf8(Some(value)), - )), - ), - ty: ty.clone(), - }), - right_expr: Box::new(ScalarExpression::Binary { - op: BinaryOperator::Lt, - left_expr: left_expr.clone(), - right_expr: Box::new( - ScalarExpression::Constant(ValueRef::from( - DataValue::Utf8(Some(new_value)), - )), - ), - ty: ty.clone(), - }), - ty: ty.clone(), - }; - filter_op.predicate = new_expr; - } - } + if let ScalarExpression::Constant(value) = right_expr.as_ref() { + if let DataValue::Utf8(value_str) = (**value).clone() { + if let Some(value_str) = value_str.as_ref() { + if value_str.ends_with('%') { + let x = value_str.trim_end_matches('%'); + let mut new_value = increment_last_char(x); + if let Some(new_value) = new_value { + let new_expr = Self::create_new_expr(left_expr, ty, x.to_string(), new_value); + filter_op.predicate = new_expr; } } - _ => { - graph.version += 1; - return Ok(()); - } } } } } graph.replace_node(node_id, Operator::Filter(filter_op)) } - // mark changed to skip this rule batch - graph.version += 1; Ok(()) } } -fn increment_char(v: char) -> Option { - match v { - 'z' => None, - 'Z' => None, - _ => std::char::from_u32(v as u32 + 1), +impl LikeRewrite { + fn create_new_expr(left_expr: &mut Box, ty: &mut LogicalType, mut value: String, mut new_value: String) -> ScalarExpression { + let new_expr = ScalarExpression::Binary { + op: BinaryOperator::And, + left_expr: Box::new(ScalarExpression::Binary { + op: BinaryOperator::GtEq, + left_expr: left_expr.clone(), + right_expr: Box::new( + ScalarExpression::Constant(ValueRef::from( + DataValue::Utf8(Some(value)), + )), + ), + ty: ty.clone(), + }), + + right_expr: Box::new(ScalarExpression::Binary { + op: BinaryOperator::Lt, + left_expr: left_expr.clone(), + right_expr: Box::new( + ScalarExpression::Constant(ValueRef::from( + DataValue::Utf8(Some(new_value)), + )), + ), + ty: ty.clone(), + }), + ty: ty.clone(), + }; + new_expr + } +} + +fn increment_last_char(s: &str) -> Option { + let mut chars: Vec = s.chars().collect(); + for i in (0..chars.len()).rev() { + if let Some(next_char) = std::char::from_u32(chars[i] as u32 + 1) { + chars[i] = next_char; + return Some(chars.into_iter().collect()); + } } + None } #[cfg(test)] @@ -211,7 +207,7 @@ mod test { use crate::expression::{BinaryOperator, ScalarExpression, UnaryOperator}; use crate::optimizer::heuristic::batch::HepBatchStrategy; use crate::optimizer::heuristic::optimizer::HepOptimizer; - use crate::optimizer::rule::simplification::increment_char; + use crate::optimizer::rule::simplification::increment_last_char; use crate::optimizer::rule::RuleImpl; use crate::planner::operator::filter::FilterOperator; use crate::planner::operator::Operator; @@ -223,9 +219,9 @@ mod test { #[test] fn test_increment_char() { - assert_eq!(increment_char('a'), Some('b')); - assert_eq!(increment_char('z'), None); - assert_eq!(increment_char('A'), Some('B')); + assert_eq!(increment_last_char("abc"), Some("abd".to_string())); + assert_eq!(increment_last_char("abz"), Some("ab{".to_string())); + assert_eq!(increment_last_char("ab}"), Some("ab~".to_string())); } #[tokio::test] From 696232c23e7dd4698e7d75b63aacb7b7b843f807 Mon Sep 17 00:00:00 2001 From: Xwg Date: Wed, 6 Dec 2023 00:34:43 +0800 Subject: [PATCH 4/9] fix(rbo): COMMIT --- src/optimizer/rule/simplification.rs | 71 +++++++++++++++++++++------- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/src/optimizer/rule/simplification.rs b/src/optimizer/rule/simplification.rs index 6b092cd4..b64b3a1d 100644 --- a/src/optimizer/rule/simplification.rs +++ b/src/optimizer/rule/simplification.rs @@ -7,6 +7,7 @@ use crate::planner::operator::join::JoinCondition; use crate::planner::operator::Operator; use crate::types::value::{DataValue, ValueRef}; use lazy_static::lazy_static; +use crate::planner::operator::filter::FilterOperator; use crate::types::LogicalType; lazy_static! { static ref LIKE_REWRITE_RULE: Pattern = { @@ -127,26 +128,25 @@ impl Rule for LikeRewrite { fn apply(&self, node_id: HepNodeId, graph: &mut HepGraph) -> Result<(), OptimizerError> { if let Operator::Filter(mut filter_op) = graph.operator(node_id).clone() { - // if is like expression if let ScalarExpression::Binary { op: BinaryOperator::Like, - left_expr, - right_expr, + ref mut left_expr, + ref mut right_expr, ty, - } = &mut filter_op.predicate + } = filter_op.predicate.clone() { if let ScalarExpression::Constant(value) = right_expr.as_ref() { if let DataValue::Utf8(value_str) = (**value).clone() { - if let Some(value_str) = value_str.as_ref() { + value_str.map(|value_str| { if value_str.ends_with('%') { - let x = value_str.trim_end_matches('%'); - let mut new_value = increment_last_char(x); - if let Some(new_value) = new_value { - let new_expr = Self::create_new_expr(left_expr, ty, x.to_string(), new_value); - filter_op.predicate = new_expr; - } + let left_bound = value_str.trim_end_matches('%'); + let mut right_bound = increment_last_char(left_bound); + + right_bound.map(|rb| { + filter_op.predicate = Self::create_new_expr(&mut left_expr.clone(), ty, left_bound.to_string(), rb); + }); } - } + }); } } } @@ -157,7 +157,7 @@ impl Rule for LikeRewrite { } impl LikeRewrite { - fn create_new_expr(left_expr: &mut Box, ty: &mut LogicalType, mut value: String, mut new_value: String) -> ScalarExpression { + fn create_new_expr(left_expr: &mut Box, ty: LogicalType, left_bound: String, right_bound: String) -> ScalarExpression { let new_expr = ScalarExpression::Binary { op: BinaryOperator::And, left_expr: Box::new(ScalarExpression::Binary { @@ -165,10 +165,10 @@ impl LikeRewrite { left_expr: left_expr.clone(), right_expr: Box::new( ScalarExpression::Constant(ValueRef::from( - DataValue::Utf8(Some(value)), + DataValue::Utf8(Some(left_bound)), )), ), - ty: ty.clone(), + ty: ty, }), right_expr: Box::new(ScalarExpression::Binary { @@ -176,15 +176,50 @@ impl LikeRewrite { left_expr: left_expr.clone(), right_expr: Box::new( ScalarExpression::Constant(ValueRef::from( - DataValue::Utf8(Some(new_value)), + DataValue::Utf8(Some(right_bound)), )), ), - ty: ty.clone(), + ty, }), - ty: ty.clone(), + ty, }; new_expr } + + fn process_filter_operator(&self, filter_op: &mut FilterOperator) -> Result<(), OptimizerError> { + if let ScalarExpression::Binary { + op: BinaryOperator::Like, + left_expr, + right_expr, + ty, + } = &mut filter_op.predicate + { + self.process_like_expression(left_expr, right_expr, ty)?; + } + Ok(()) + } + + fn process_like_expression(&self, left_expr: &mut Box, right_expr: &mut Box, ty: &mut LogicalType) -> Result<(), OptimizerError> { + if let ScalarExpression::Constant(value) = right_expr.as_ref() { + if let DataValue::Utf8(value_str) = (**value).clone() { + if let Some(value_str) = value_str.as_ref() { + self.process_utf8_value(left_expr, ty, value_str)?; + } + } + } + Ok(()) + } + + fn process_utf8_value(&self, left_expr: &mut Box, ty: &mut LogicalType, value_str: &str) -> Result<(), OptimizerError> { + if value_str.ends_with('%') { + let x = value_str.trim_end_matches('%'); + if let Some(new_value) = increment_last_char(x) { + let new_expr = Self::create_new_expr(left_expr, *ty, x.to_string(), new_value); + *left_expr = Box::new(new_expr); + } + } + Ok(()) + } } fn increment_last_char(s: &str) -> Option { From 5b1d4ed34c274f38968a03b2d0102823f69e29d7 Mon Sep 17 00:00:00 2001 From: Xwg Date: Wed, 6 Dec 2023 00:38:44 +0800 Subject: [PATCH 5/9] fix(rbo): finish --- src/optimizer/rule/simplification.rs | 58 +++++++--------------------- 1 file changed, 13 insertions(+), 45 deletions(-) diff --git a/src/optimizer/rule/simplification.rs b/src/optimizer/rule/simplification.rs index b64b3a1d..2b5a115e 100644 --- a/src/optimizer/rule/simplification.rs +++ b/src/optimizer/rule/simplification.rs @@ -137,16 +137,7 @@ impl Rule for LikeRewrite { { if let ScalarExpression::Constant(value) = right_expr.as_ref() { if let DataValue::Utf8(value_str) = (**value).clone() { - value_str.map(|value_str| { - if value_str.ends_with('%') { - let left_bound = value_str.trim_end_matches('%'); - let mut right_bound = increment_last_char(left_bound); - - right_bound.map(|rb| { - filter_op.predicate = Self::create_new_expr(&mut left_expr.clone(), ty, left_bound.to_string(), rb); - }); - } - }); + Self::process_value_str(value_str, left_expr, ty, &mut filter_op); } } } @@ -157,6 +148,17 @@ impl Rule for LikeRewrite { } impl LikeRewrite { + fn process_value_str(value_str: Option, left_expr: &mut Box, ty: LogicalType, filter_op: &mut FilterOperator) { + value_str.map(|value_str| { + if value_str.ends_with('%') { + let left_bound = value_str.trim_end_matches('%'); + let right_bound = increment_last_char(left_bound); + right_bound.map(|rb| { + filter_op.predicate = Self::create_new_expr(&mut left_expr.clone(), ty, left_bound.to_string(), rb); + }); + } + }); + } fn create_new_expr(left_expr: &mut Box, ty: LogicalType, left_bound: String, right_bound: String) -> ScalarExpression { let new_expr = ScalarExpression::Binary { op: BinaryOperator::And, @@ -185,41 +187,6 @@ impl LikeRewrite { }; new_expr } - - fn process_filter_operator(&self, filter_op: &mut FilterOperator) -> Result<(), OptimizerError> { - if let ScalarExpression::Binary { - op: BinaryOperator::Like, - left_expr, - right_expr, - ty, - } = &mut filter_op.predicate - { - self.process_like_expression(left_expr, right_expr, ty)?; - } - Ok(()) - } - - fn process_like_expression(&self, left_expr: &mut Box, right_expr: &mut Box, ty: &mut LogicalType) -> Result<(), OptimizerError> { - if let ScalarExpression::Constant(value) = right_expr.as_ref() { - if let DataValue::Utf8(value_str) = (**value).clone() { - if let Some(value_str) = value_str.as_ref() { - self.process_utf8_value(left_expr, ty, value_str)?; - } - } - } - Ok(()) - } - - fn process_utf8_value(&self, left_expr: &mut Box, ty: &mut LogicalType, value_str: &str) -> Result<(), OptimizerError> { - if value_str.ends_with('%') { - let x = value_str.trim_end_matches('%'); - if let Some(new_value) = increment_last_char(x) { - let new_expr = Self::create_new_expr(left_expr, *ty, x.to_string(), new_value); - *left_expr = Box::new(new_expr); - } - } - Ok(()) - } } fn increment_last_char(s: &str) -> Option { @@ -233,6 +200,7 @@ fn increment_last_char(s: &str) -> Option { None } + #[cfg(test)] mod test { use crate::binder::test::select_sql_run; From 74e622c6bc2ea1c936c7c397a7f6edae301aca6f Mon Sep 17 00:00:00 2001 From: Xwg Date: Wed, 6 Dec 2023 00:42:04 +0800 Subject: [PATCH 6/9] cargo fmt --- src/optimizer/rule/simplification.rs | 45 +++++++++++++++++----------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/src/optimizer/rule/simplification.rs b/src/optimizer/rule/simplification.rs index 2b5a115e..acc62990 100644 --- a/src/optimizer/rule/simplification.rs +++ b/src/optimizer/rule/simplification.rs @@ -3,12 +3,12 @@ use crate::optimizer::core::pattern::{Pattern, PatternChildrenPredicate}; use crate::optimizer::core::rule::Rule; use crate::optimizer::heuristic::graph::{HepGraph, HepNodeId}; use crate::optimizer::OptimizerError; +use crate::planner::operator::filter::FilterOperator; use crate::planner::operator::join::JoinCondition; use crate::planner::operator::Operator; use crate::types::value::{DataValue, ValueRef}; -use lazy_static::lazy_static; -use crate::planner::operator::filter::FilterOperator; use crate::types::LogicalType; +use lazy_static::lazy_static; lazy_static! { static ref LIKE_REWRITE_RULE: Pattern = { Pattern { @@ -148,39 +148,51 @@ impl Rule for LikeRewrite { } impl LikeRewrite { - fn process_value_str(value_str: Option, left_expr: &mut Box, ty: LogicalType, filter_op: &mut FilterOperator) { + fn process_value_str( + value_str: Option, + left_expr: &mut Box, + ty: LogicalType, + filter_op: &mut FilterOperator, + ) { value_str.map(|value_str| { if value_str.ends_with('%') { let left_bound = value_str.trim_end_matches('%'); let right_bound = increment_last_char(left_bound); right_bound.map(|rb| { - filter_op.predicate = Self::create_new_expr(&mut left_expr.clone(), ty, left_bound.to_string(), rb); + filter_op.predicate = Self::create_new_expr( + &mut left_expr.clone(), + ty, + left_bound.to_string(), + rb, + ); }); } }); } - fn create_new_expr(left_expr: &mut Box, ty: LogicalType, left_bound: String, right_bound: String) -> ScalarExpression { + + fn create_new_expr( + left_expr: &mut Box, + ty: LogicalType, + left_bound: String, + right_bound: String, + ) -> ScalarExpression { let new_expr = ScalarExpression::Binary { op: BinaryOperator::And, left_expr: Box::new(ScalarExpression::Binary { op: BinaryOperator::GtEq, left_expr: left_expr.clone(), - right_expr: Box::new( - ScalarExpression::Constant(ValueRef::from( - DataValue::Utf8(Some(left_bound)), - )), - ), - ty: ty, + right_expr: Box::new(ScalarExpression::Constant(ValueRef::from(DataValue::Utf8( + Some(left_bound), + )))), + ty, }), right_expr: Box::new(ScalarExpression::Binary { op: BinaryOperator::Lt, left_expr: left_expr.clone(), - right_expr: Box::new( - ScalarExpression::Constant(ValueRef::from( - DataValue::Utf8(Some(right_bound)), - )), - ), + right_expr: Box::new(ScalarExpression::Constant(ValueRef::from(DataValue::Utf8( + Some(right_bound), + )))), ty, }), ty, @@ -200,7 +212,6 @@ fn increment_last_char(s: &str) -> Option { None } - #[cfg(test)] mod test { use crate::binder::test::select_sql_run; From 082b6355dda2cab571f27c7a64dd17babc4254d4 Mon Sep 17 00:00:00 2001 From: Xwg Date: Sat, 23 Dec 2023 19:09:02 +0800 Subject: [PATCH 7/9] Test: Add some e2e test and unit test --- src/optimizer/rule/simplification.rs | 73 ++++++++++++++++++++++++++++ tests/slt/filter.slt | 22 +++++++++ 2 files changed, 95 insertions(+) diff --git a/src/optimizer/rule/simplification.rs b/src/optimizer/rule/simplification.rs index acc62990..cdefa4e1 100644 --- a/src/optimizer/rule/simplification.rs +++ b/src/optimizer/rule/simplification.rs @@ -561,4 +561,77 @@ mod test { Ok(()) } + + #[tokio::test] + async fn test_like_rewrite() ->Result<(),DatabaseError> { + let plan = select_sql_run("select * from t1 where c1 like 'abc%%'").await?; + let best_plan = HepOptimizer::new(plan.clone()) + .batch( + "test_like_rewrite".to_string(), + HepBatchStrategy::once_topdown(), + vec![RuleImpl::LikeRewrite], + ) + .find_best()?; + + println!("{:#?}", best_plan); + assert_eq!(best_plan.childrens.len(), 1); + + match best_plan.operator { + Operator::Project(op)=>{ + assert_eq!(op.exprs.len(), 2); + } + _=>unreachable!() + } + + match &best_plan.childrens[0].operator { + Operator::Filter(op)=>{ + assert_eq!(op.predicate, ScalarExpression::Binary { + op: BinaryOperator::And, + left_expr: Box::new(ScalarExpression::Binary { + op: BinaryOperator::GtEq, + left_expr: Box::new(ScalarExpression::ColumnRef(Arc::new(ColumnCatalog { + summary: ColumnSummary { + id: Some(0), + name: "c1".to_string(), + table_name: Some(Arc::new("t1".to_string())), + }, + nullable: false, + desc: ColumnDesc { + column_datatype: LogicalType::Integer, + is_primary: true, + is_unique: false, + }, + ref_expr: None, + }))), + right_expr: Box::new(ScalarExpression::Constant(Arc::new(DataValue::Utf8(Some("abc".to_string()))))), + ty: LogicalType::Boolean, + }), + right_expr: Box::new(ScalarExpression::Binary { + op: BinaryOperator::Lt, + left_expr: Box::new(ScalarExpression::ColumnRef(Arc::new(ColumnCatalog { + summary: ColumnSummary { + id: Some(0), + name: "c1".to_string(), + table_name: Some(Arc::new("t1".to_string())), + }, + nullable: false, + desc: ColumnDesc { + column_datatype: LogicalType::Integer, + is_primary: true, + is_unique: false, + }, + ref_expr: None, + }))), + right_expr: Box::new(ScalarExpression::Constant(Arc::new(DataValue::Utf8(Some("abd".to_string()))))), + ty: LogicalType::Boolean, + }), + ty: LogicalType::Boolean, + }); + } + _=>unreachable!() + } + + Ok(()) + } + } diff --git a/tests/slt/filter.slt b/tests/slt/filter.slt index 45cb7fd9..e2decadf 100644 --- a/tests/slt/filter.slt +++ b/tests/slt/filter.slt @@ -112,6 +112,28 @@ select * from t1 where id not in (1, 2) 0 KipSQL 3 Cool! +query II +select * from t1 where v1 like 'Kip%%' +---- +0 KipSQL +1 KipDB +2 KipBlog + +query II +select * from t1 where v1 like 'KC%%' +---- + + +query II +select * from t1 where v1 like 'Co%%' +---- +3 Cool! + +query II +select * from t1 where v1 like 'Cool!%' +---- +3 Cool! + statement ok drop table t From c699ce32c109723d4920663e2650eb16e60c37ef Mon Sep 17 00:00:00 2001 From: Xwg Date: Sat, 23 Dec 2023 19:10:06 +0800 Subject: [PATCH 8/9] Fmt: cargo fmt --- src/optimizer/rule/simplification.rs | 104 +++++++++++++++------------ 1 file changed, 57 insertions(+), 47 deletions(-) diff --git a/src/optimizer/rule/simplification.rs b/src/optimizer/rule/simplification.rs index cdefa4e1..66157f00 100644 --- a/src/optimizer/rule/simplification.rs +++ b/src/optimizer/rule/simplification.rs @@ -563,7 +563,7 @@ mod test { } #[tokio::test] - async fn test_like_rewrite() ->Result<(),DatabaseError> { + async fn test_like_rewrite() -> Result<(), DatabaseError> { let plan = select_sql_run("select * from t1 where c1 like 'abc%%'").await?; let best_plan = HepOptimizer::new(plan.clone()) .batch( @@ -577,61 +577,71 @@ mod test { assert_eq!(best_plan.childrens.len(), 1); match best_plan.operator { - Operator::Project(op)=>{ + Operator::Project(op) => { assert_eq!(op.exprs.len(), 2); } - _=>unreachable!() + _ => unreachable!(), } match &best_plan.childrens[0].operator { - Operator::Filter(op)=>{ - assert_eq!(op.predicate, ScalarExpression::Binary { - op: BinaryOperator::And, - left_expr: Box::new(ScalarExpression::Binary { - op: BinaryOperator::GtEq, - left_expr: Box::new(ScalarExpression::ColumnRef(Arc::new(ColumnCatalog { - summary: ColumnSummary { - id: Some(0), - name: "c1".to_string(), - table_name: Some(Arc::new("t1".to_string())), - }, - nullable: false, - desc: ColumnDesc { - column_datatype: LogicalType::Integer, - is_primary: true, - is_unique: false, - }, - ref_expr: None, - }))), - right_expr: Box::new(ScalarExpression::Constant(Arc::new(DataValue::Utf8(Some("abc".to_string()))))), - ty: LogicalType::Boolean, - }), - right_expr: Box::new(ScalarExpression::Binary { - op: BinaryOperator::Lt, - left_expr: Box::new(ScalarExpression::ColumnRef(Arc::new(ColumnCatalog { - summary: ColumnSummary { - id: Some(0), - name: "c1".to_string(), - table_name: Some(Arc::new("t1".to_string())), - }, - nullable: false, - desc: ColumnDesc { - column_datatype: LogicalType::Integer, - is_primary: true, - is_unique: false, - }, - ref_expr: None, - }))), - right_expr: Box::new(ScalarExpression::Constant(Arc::new(DataValue::Utf8(Some("abd".to_string()))))), + Operator::Filter(op) => { + assert_eq!( + op.predicate, + ScalarExpression::Binary { + op: BinaryOperator::And, + left_expr: Box::new(ScalarExpression::Binary { + op: BinaryOperator::GtEq, + left_expr: Box::new(ScalarExpression::ColumnRef(Arc::new( + ColumnCatalog { + summary: ColumnSummary { + id: Some(0), + name: "c1".to_string(), + table_name: Some(Arc::new("t1".to_string())), + }, + nullable: false, + desc: ColumnDesc { + column_datatype: LogicalType::Integer, + is_primary: true, + is_unique: false, + }, + ref_expr: None, + } + ))), + right_expr: Box::new(ScalarExpression::Constant(Arc::new( + DataValue::Utf8(Some("abc".to_string())) + ))), + ty: LogicalType::Boolean, + }), + right_expr: Box::new(ScalarExpression::Binary { + op: BinaryOperator::Lt, + left_expr: Box::new(ScalarExpression::ColumnRef(Arc::new( + ColumnCatalog { + summary: ColumnSummary { + id: Some(0), + name: "c1".to_string(), + table_name: Some(Arc::new("t1".to_string())), + }, + nullable: false, + desc: ColumnDesc { + column_datatype: LogicalType::Integer, + is_primary: true, + is_unique: false, + }, + ref_expr: None, + } + ))), + right_expr: Box::new(ScalarExpression::Constant(Arc::new( + DataValue::Utf8(Some("abd".to_string())) + ))), + ty: LogicalType::Boolean, + }), ty: LogicalType::Boolean, - }), - ty: LogicalType::Boolean, - }); + } + ); } - _=>unreachable!() + _ => unreachable!(), } Ok(()) } - } From 143f1282f8b2ad356d7e4fac93a32e4b271190a6 Mon Sep 17 00:00:00 2001 From: Xwg Date: Sat, 23 Dec 2023 19:29:38 +0800 Subject: [PATCH 9/9] Fix: merge issue --- src/optimizer/rule/simplification.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/optimizer/rule/simplification.rs b/src/optimizer/rule/simplification.rs index 55fee618..c9961955 100644 --- a/src/optimizer/rule/simplification.rs +++ b/src/optimizer/rule/simplification.rs @@ -572,8 +572,6 @@ mod test { vec![RuleImpl::LikeRewrite], ) .find_best()?; - - println!("{:#?}", best_plan); assert_eq!(best_plan.childrens.len(), 1); match best_plan.operator { @@ -596,13 +594,13 @@ mod test { summary: ColumnSummary { id: Some(0), name: "c1".to_string(), - table_name: Some(Arc::new("t1".to_string())), }, nullable: false, desc: ColumnDesc { column_datatype: LogicalType::Integer, is_primary: true, is_unique: false, + default: None, }, ref_expr: None, } @@ -619,13 +617,13 @@ mod test { summary: ColumnSummary { id: Some(0), name: "c1".to_string(), - table_name: Some(Arc::new("t1".to_string())), }, nullable: false, desc: ColumnDesc { column_datatype: LogicalType::Integer, is_primary: true, is_unique: false, + default: None, }, ref_expr: None, }