diff --git a/benchmark/clickbench/internal/queries/04.sql b/benchmark/clickbench/internal/queries/04.sql new file mode 100644 index 000000000000..200be07a5f5a --- /dev/null +++ b/benchmark/clickbench/internal/queries/04.sql @@ -0,0 +1 @@ +select (number::string)::Int from numbers(100000000) ignore_result; diff --git a/src/query/expression/src/converts/arrow/from.rs b/src/query/expression/src/converts/arrow/from.rs index 332ed24d31c6..efd6f1dfe193 100644 --- a/src/query/expression/src/converts/arrow/from.rs +++ b/src/query/expression/src/converts/arrow/from.rs @@ -75,6 +75,14 @@ impl DataBlock { ) -> Result<(Self, DataSchema)> { assert_eq!(schema.num_fields(), batch.num_columns()); + if schema.fields().len() != batch.num_columns() { + return Err(ErrorCode::Internal(format!( + "conversion from RecordBatch to DataBlock failed, schema fields len: {}, RecordBatch columns len: {}", + schema.fields().len(), + batch.num_columns() + ))); + } + if batch.num_columns() == 0 { return Ok((DataBlock::new(vec![], batch.num_rows()), schema.clone())); } diff --git a/src/query/expression/src/utils/serialize.rs b/src/query/expression/src/utils/serialize.rs index 8d01ad1bcac5..7307ecc58bb8 100644 --- a/src/query/expression/src/utils/serialize.rs +++ b/src/query/expression/src/utils/serialize.rs @@ -13,11 +13,10 @@ // limitations under the License. use std::cmp::Ordering; +use std::result::Result; use chrono::Datelike; use chrono::NaiveDate; -use databend_common_exception::ErrorCode; -use databend_common_exception::Result; use crate::types::decimal::Decimal; use crate::types::decimal::DecimalSize; @@ -29,12 +28,13 @@ pub fn uniform_date(date: NaiveDate) -> i32 { date.num_days_from_ce() - EPOCH_DAYS_FROM_CE } +// Used in function, so we don't want to return ErrorCode with backtrace pub fn read_decimal_with_size( buf: &[u8], size: DecimalSize, exact: bool, rounding_mode: bool, -) -> Result<(T, usize)> { +) -> Result<(T, usize), String> { // Read one more digit for round let (n, d, e, n_read) = read_decimal::(buf, (size.precision + 1) as u32, size.scale as _, exact)?; @@ -91,7 +91,7 @@ pub fn read_decimal( max_digits: u32, mut max_scales: u32, exact: bool, -) -> Result<(T, u8, i32, usize)> { +) -> Result<(T, u8, i32, usize), String> { if buf.is_empty() { return Err(decimal_parse_error("empty")); } @@ -302,7 +302,7 @@ pub fn read_decimal( pub fn read_decimal_from_json( value: &serde_json::Value, size: DecimalSize, -) -> Result { +) -> Result { match value { serde_json::Value::Number(n) => { if n.is_i64() { @@ -323,14 +323,14 @@ pub fn read_decimal_from_json( let (n, _) = read_decimal_with_size::(s.as_bytes(), size, true, true)?; Ok(n) } - _ => Err(ErrorCode::from("Incorrect json value for decimal")), + _ => Err("Incorrect json value for decimal".to_string()), } } -fn decimal_parse_error(msg: &str) -> ErrorCode { - ErrorCode::BadArguments(format!("bad decimal literal: {msg}")) +fn decimal_parse_error(msg: &str) -> String { + format!("bad decimal literal: {msg}") } -fn decimal_overflow_error() -> ErrorCode { - ErrorCode::Overflow("Decimal overflow") +fn decimal_overflow_error() -> String { + "Decimal overflow".to_string() } diff --git a/src/query/functions/src/scalars/arithmetic.rs b/src/query/functions/src/scalars/arithmetic.rs index 6579799ed909..7d58f6978208 100644 --- a/src/query/functions/src/scalars/arithmetic.rs +++ b/src/query/functions/src/scalars/arithmetic.rs @@ -17,9 +17,11 @@ use std::ops::BitAnd; use std::ops::BitOr; use std::ops::BitXor; +use std::str::FromStr; use std::sync::Arc; use databend_common_arrow::arrow::bitmap::Bitmap; +use databend_common_expression::serialize::read_decimal_with_size; use databend_common_expression::types::decimal::DecimalDomain; use databend_common_expression::types::decimal::DecimalType; use databend_common_expression::types::nullable::NullableColumn; @@ -878,18 +880,48 @@ fn unary_minus_decimal( }) } +#[inline] +fn parse_number( + s: &str, + number_datatype: &NumberDataType, + rounding_mode: bool, +) -> Result::Err> +where + T: FromStr + num_traits::Num, +{ + match s.parse::() { + Ok(v) => Ok(v), + Err(e) => { + if !number_datatype.is_float() { + let decimal_pro = number_datatype.get_decimal_properties().unwrap(); + let (res, _) = + read_decimal_with_size::(s.as_bytes(), decimal_pro, true, rounding_mode) + .map_err(|_| e)?; + format!("{}", res).parse::() + } else { + Err(e) + } + } + } +} + fn register_string_to_number(registry: &mut FunctionRegistry) { for dest_type in ALL_NUMERICS_TYPES { with_number_mapped_type!(|DEST_TYPE| match dest_type { NumberDataType::DEST_TYPE => { let name = format!("to_{dest_type}").to_lowercase(); + let data_type = DEST_TYPE::data_type(); registry .register_passthrough_nullable_1_arg::, _, _>( &name, |_, _| FunctionDomain::MayThrow, vectorize_with_builder_1_arg::>( move |val, output, ctx| { - match val.parse::() { + match parse_number::( + val, + &data_type, + ctx.func_ctx.rounding_mode, + ) { Ok(new_val) => output.push(new_val), Err(e) => { ctx.set_error(output.len(), e.to_string()); @@ -901,6 +933,7 @@ fn register_string_to_number(registry: &mut FunctionRegistry) { ); let name = format!("try_to_{dest_type}").to_lowercase(); + let data_type = DEST_TYPE::data_type(); registry .register_combine_nullable_1_arg::, _, _>( &name, @@ -908,8 +941,12 @@ fn register_string_to_number(registry: &mut FunctionRegistry) { vectorize_with_builder_1_arg::< StringType, NullableType>, - >(|val, output, _| { - if let Ok(new_val) = val.parse::() { + >(move |val, output, ctx| { + if let Ok(new_val) = parse_number::( + val, + &data_type, + ctx.func_ctx.rounding_mode, + ) { output.push(new_val); } else { output.push_null(); diff --git a/src/query/functions/src/scalars/decimal/cast.rs b/src/query/functions/src/scalars/decimal/cast.rs index dd4494bc0165..3547e2611d55 100644 --- a/src/query/functions/src/scalars/decimal/cast.rs +++ b/src/query/functions/src/scalars/decimal/cast.rs @@ -475,7 +475,7 @@ where { Ok((d, _)) => d, Err(e) => { - ctx.set_error(builder.len(), e.message()); + ctx.set_error(builder.len(), e); T::zero() } }; diff --git a/src/query/sql/src/evaluator/cse.rs b/src/query/sql/src/evaluator/cse.rs index e5a8d3b6d8a0..3ec7e9f48d79 100644 --- a/src/query/sql/src/evaluator/cse.rs +++ b/src/query/sql/src/evaluator/cse.rs @@ -37,10 +37,10 @@ pub fn apply_cse( count_expressions(expr, &mut cse_counter); } - let mut cse_candidates: Vec = cse_counter + let mut cse_candidates: Vec<&Expr> = cse_counter .iter() .filter(|(_, count)| **count > 1) - .map(|(expr, _)| expr.clone()) + .map(|(expr, _)| expr) .collect(); // Make sure the smaller expr goes firstly @@ -52,7 +52,7 @@ pub fn apply_cse( let mut cse_replacements = HashMap::new(); let candidates_nums = cse_candidates.len(); - for cse_candidate in &cse_candidates { + for cse_candidate in cse_candidates.iter() { let temp_var = format!("__temp_cse_{}", temp_var_counter); let temp_expr = Expr::ColumnRef { span: None, @@ -61,7 +61,7 @@ pub fn apply_cse( display_name: temp_var.clone(), }; - let mut expr_cloned = cse_candidate.clone(); + let mut expr_cloned = (*cse_candidate).clone(); perform_cse_replacement(&mut expr_cloned, &cse_replacements); debug!( diff --git a/src/query/sql/src/planner/binder/column_binding.rs b/src/query/sql/src/planner/binder/column_binding.rs index 4a110c6faf22..21e4562ebeb8 100644 --- a/src/query/sql/src/planner/binder/column_binding.rs +++ b/src/query/sql/src/planner/binder/column_binding.rs @@ -41,20 +41,47 @@ pub struct ColumnBinding { } const DUMMY_INDEX: usize = usize::MAX; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u64)] +pub enum DummyColumnType { + WindowFunction = 1, + AggregateFunction = 2, + Subquery = 3, + UDF = 4, + AsyncFunction = 5, + Other = 6, +} + +impl DummyColumnType { + fn type_identifier(&self) -> usize { + DUMMY_INDEX - (*self) as usize + } +} + impl ColumnBinding { - pub fn new_dummy_column(name: String, data_type: Box) -> Self { + pub fn new_dummy_column( + name: String, + data_type: Box, + dummy_type: DummyColumnType, + ) -> Self { + let index = dummy_type.type_identifier(); ColumnBinding { database_name: None, table_name: None, column_position: None, table_index: None, column_name: name, - index: DUMMY_INDEX, + index, data_type, visibility: Visibility::Visible, virtual_computed_expr: None, } } + + pub fn is_dummy(&self) -> bool { + self.index >= DummyColumnType::Other.type_identifier() + } } impl ColumnIndex for ColumnBinding {} diff --git a/src/query/sql/src/planner/binder/mod.rs b/src/query/sql/src/planner/binder/mod.rs index d1d528de2c5c..3a7d53ea9ee7 100644 --- a/src/query/sql/src/planner/binder/mod.rs +++ b/src/query/sql/src/planner/binder/mod.rs @@ -66,6 +66,7 @@ pub use binder::Binder; pub use builders::*; pub use column_binding::ColumnBinding; pub use column_binding::ColumnBindingBuilder; +pub use column_binding::DummyColumnType; pub use copy_into_table::resolve_file_location; pub use copy_into_table::resolve_stage_location; pub use explain::ExplainConfig; diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs index 47ca97055bfe..211fb3e01e2a 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs @@ -17,16 +17,15 @@ use std::sync::Arc; use databend_common_exception::Result; use databend_common_expression::DataField; use databend_common_expression::DataSchemaRefExt; -use databend_common_expression::Scalar; use itertools::Itertools; use crate::optimizer::extract::Matcher; +use crate::optimizer::rule::constant::is_falsy; use crate::optimizer::rule::Rule; use crate::optimizer::rule::RuleID; use crate::optimizer::rule::TransformResult; use crate::optimizer::RelExpr; use crate::optimizer::SExpr; -use crate::plans::ConstantExpr; use crate::plans::ConstantTableScan; use crate::plans::Filter; use crate::plans::Operator; @@ -73,18 +72,7 @@ impl Rule for RuleEliminateFilter { .collect::>(); // Rewrite false filter to be empty scan - if predicates.iter().any(|predicate| { - matches!( - predicate, - ScalarExpr::ConstantExpr(ConstantExpr { - value: Scalar::Boolean(false), - .. - }) | ScalarExpr::ConstantExpr(ConstantExpr { - value: Scalar::Null, - .. - }) - ) - }) { + if predicates.iter().any(is_falsy) { let output_columns = eval_scalar .derive_relational_prop(&RelExpr::with_s_expr(s_expr))? .output_columns diff --git a/src/query/sql/src/planner/optimizer/rule/rule.rs b/src/query/sql/src/planner/optimizer/rule/rule.rs index 7a4043e86ba4..4d2704cafd1c 100644 --- a/src/query/sql/src/planner/optimizer/rule/rule.rs +++ b/src/query/sql/src/planner/optimizer/rule/rule.rs @@ -26,14 +26,20 @@ use crate::optimizer::SExpr; pub static DEFAULT_REWRITE_RULES: LazyLock> = LazyLock::new(|| { vec![ - RuleID::NormalizeScalarFilter, - RuleID::EliminateFilter, RuleID::EliminateSort, - RuleID::MergeFilter, RuleID::MergeEvalScalar, + // Filter + RuleID::EliminateFilter, + RuleID::MergeFilter, + RuleID::NormalizeScalarFilter, RuleID::PushDownFilterUnion, RuleID::PushDownFilterAggregate, RuleID::PushDownFilterWindow, + RuleID::PushDownFilterSort, + RuleID::PushDownFilterEvalScalar, + // Limit + RuleID::PushDownFilterJoin, + RuleID::PushDownFilterProjectSet, RuleID::PushDownLimit, RuleID::PushDownLimitUnion, RuleID::PushDownLimitEvalScalar, @@ -42,10 +48,6 @@ pub static DEFAULT_REWRITE_RULES: LazyLock> = LazyLock::new(|| { RuleID::PushDownLimitAggregate, RuleID::PushDownLimitOuterJoin, RuleID::PushDownLimitScan, - RuleID::PushDownFilterSort, - RuleID::PushDownFilterEvalScalar, - RuleID::PushDownFilterJoin, - RuleID::PushDownFilterProjectSet, RuleID::SemiToInnerJoin, RuleID::FoldCountAggregate, RuleID::TryApplyAggIndex, diff --git a/src/query/sql/src/planner/semantic/lowering.rs b/src/query/sql/src/planner/semantic/lowering.rs index 88056be59c95..f2a1fab7a4e8 100644 --- a/src/query/sql/src/planner/semantic/lowering.rs +++ b/src/query/sql/src/planner/semantic/lowering.rs @@ -24,6 +24,7 @@ use databend_common_expression::Expr; use databend_common_expression::RawExpr; use databend_common_functions::BUILTIN_FUNCTIONS; +use crate::binder::DummyColumnType; use crate::plans::ScalarExpr; use crate::ColumnBinding; use crate::ColumnEntry; @@ -198,6 +199,7 @@ impl ScalarExpr { id: ColumnBinding::new_dummy_column( win.display_name.clone(), Box::new(win.func.return_type()), + DummyColumnType::WindowFunction, ), data_type: win.func.return_type(), display_name: win.display_name.clone(), @@ -207,6 +209,7 @@ impl ScalarExpr { id: ColumnBinding::new_dummy_column( agg.display_name.clone(), Box::new((*agg.return_type).clone()), + DummyColumnType::AggregateFunction, ), data_type: (*agg.return_type).clone(), display_name: agg.display_name.clone(), @@ -234,17 +237,19 @@ impl ScalarExpr { ScalarExpr::SubqueryExpr(subquery) => RawExpr::ColumnRef { span: subquery.span, id: ColumnBinding::new_dummy_column( - "DUMMY".to_string(), + "DUMMY_SUBQUERY".to_string(), Box::new(subquery.data_type()), + DummyColumnType::Subquery, ), data_type: subquery.data_type(), - display_name: "DUMMY".to_string(), + display_name: "DUMMY_SUBQUERY".to_string(), }, ScalarExpr::UDFCall(udf) => RawExpr::ColumnRef { span: None, id: ColumnBinding::new_dummy_column( udf.display_name.clone(), Box::new((*udf.return_type).clone()), + DummyColumnType::UDF, ), data_type: (*udf.return_type).clone(), display_name: udf.display_name.clone(), @@ -260,6 +265,7 @@ impl ScalarExpr { id: ColumnBinding::new_dummy_column( async_func.display_name.clone(), Box::new(async_func.return_type.as_ref().clone()), + DummyColumnType::AsyncFunction, ), data_type: async_func.return_type.as_ref().clone(), display_name: async_func.display_name.clone(), diff --git a/tests/sqllogictests/suites/mode/cluster/distributed_copy_into_table2_execption.test b/tests/sqllogictests/suites/mode/cluster/distributed_copy_into_table2_execption.test index 1b0ed943a1ea..b29de3a3dfab 100644 --- a/tests/sqllogictests/suites/mode/cluster/distributed_copy_into_table2_execption.test +++ b/tests/sqllogictests/suites/mode/cluster/distributed_copy_into_table2_execption.test @@ -77,8 +77,8 @@ select count(*) from products; statement ok truncate table products; -statement ok -set max_threads = 1; +#statement ok +#set max_threads = 1; #query T #explain pipeline copy into products from (select $1,$2,$4 from @s1 as t2) force = true; diff --git a/tests/sqllogictests/suites/query/functions/cast.test b/tests/sqllogictests/suites/query/functions/cast.test index 18261ba38127..ac1ca0626445 100644 --- a/tests/sqllogictests/suites/query/functions/cast.test +++ b/tests/sqllogictests/suites/query/functions/cast.test @@ -88,9 +88,9 @@ statement ok set numeric_cast_option = 'truncating' query T -select CAST(10249.5500000000000000 * POW(10, 2) AS UNSIGNED) +select CAST(10249.5500000000000000 * POW(10, 2) AS UNSIGNED), '29.55'::Int, '29.155'::Int ---- -1024954 +1024954 29 29 query T select to_uint64(1024954.98046875::double) @@ -101,9 +101,9 @@ statement ok set numeric_cast_option = 'rounding' query T -select CAST(10249.5500000000000000 * POW(10, 2) AS UNSIGNED) +select CAST(10249.5500000000000000 * POW(10, 2) AS UNSIGNED), '29.55'::Int, '29.155'::Int ---- -1024955 +1024955 30 29 query T select to_uint64(1024954.98046875::double)