From 417da4e566a9cfecced30ea30e609c49553ca86f Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Mon, 8 Aug 2022 21:42:45 +0800 Subject: [PATCH 01/59] feat(query): add concat/filter kernel for chunk --- Cargo.lock | 1 + common/expression/Cargo.toml | 2 + common/expression/src/chunk.rs | 80 +- common/expression/src/lib.rs | 2 + common/expression/src/types.rs | 19 + common/expression/src/types/array.rs | 7 + common/expression/src/types/boolean.rs | 4 + common/expression/src/types/number.rs | 4 + common/expression/src/types/string.rs | 8 + common/expression/src/values.rs | 22 + common/expression/tests/it/main.rs | 1043 +----------------------- 11 files changed, 134 insertions(+), 1058 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bd75e5dfd4ed6..e0533192170dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1166,6 +1166,7 @@ dependencies = [ "comfy-table", "common-arrow", "common-ast", + "common-exception", "enum-as-inner", "goldenfile", "itertools", diff --git a/common/expression/Cargo.toml b/common/expression/Cargo.toml index 1c684b21c55c6..74105722d406e 100755 --- a/common/expression/Cargo.toml +++ b/common/expression/Cargo.toml @@ -12,6 +12,8 @@ test = false [dependencies] # In alphabetical order # Workspace dependencies common-arrow = { path = "../arrow" } +common-exception = { path = "../exception" } + # Github dependencies diff --git a/common/expression/src/chunk.rs b/common/expression/src/chunk.rs index e8396e9dbe124..31d2abb873431 100644 --- a/common/expression/src/chunk.rs +++ b/common/expression/src/chunk.rs @@ -12,29 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::ops::Range; + use crate::types::AnyType; use crate::Value; /// Chunk is a lightweight container for a group of columns. +#[derive(Clone)] pub struct Chunk { columns: Vec>, num_rows: usize, - chunk_info: Option>, } -/// ChunkInfo is extra information about a chunk, could be used during the pipeline transformation. -pub trait ChunkInfo {} - impl Chunk { + #[inline] pub fn new(columns: Vec>, num_rows: usize) -> Self { - Self::new_with_info(columns, num_rows, None) - } - - pub fn new_with_info( - columns: Vec>, - num_rows: usize, - chunk_info: Option>, - ) -> Self { debug_assert!( columns .iter() @@ -45,22 +37,76 @@ impl Chunk { .count() == 0 ); - Self { - columns, - num_rows, - chunk_info, - } + Self { columns, num_rows } } + #[inline] + pub fn empty() -> Self { + Chunk::new(vec![], 0) + } + + #[inline] pub fn columns(&self) -> &[Value] { &self.columns } + #[inline] pub fn num_rows(&self) -> usize { self.num_rows } + #[inline] pub fn num_columns(&self) -> usize { self.columns.len() } + + #[inline] + pub fn is_empty(&self) -> bool { + self.num_columns() == 0 || self.num_rows() == 0 + } + + #[inline] + pub fn memory_size(&self) -> usize { + self.columns() + .iter() + .map(|c| match c { + Value::Scalar(s) => std::mem::size_of_val(s) * self.num_rows, + Value::Column(c) => c.memory_size(), + }) + .sum() + } + + pub fn convert_to_full(&self) -> Self { + let mut columns = Vec::with_capacity(self.num_columns()); + for col in self.columns() { + match col { + Value::Scalar(s) => { + let builder = s.as_ref().repeat(self.num_rows); + let col = builder.build(); + columns.push(Value::Column(col)); + } + Value::Column(c) => columns.push(Value::Column(c.clone())), + } + } + Self { + columns, + num_rows: self.num_rows, + } + } + + pub fn slice(&self, range: Range) -> Self { + let mut columns = Vec::with_capacity(self.num_columns()); + for col in self.columns() { + match col { + Value::Scalar(s) => { + columns.push(Value::Scalar(s.clone())); + } + Value::Column(c) => columns.push(Value::Column(c.slice(range.clone()))), + } + } + Self { + columns, + num_rows: range.end - range.start + 1, + } + } } diff --git a/common/expression/src/lib.rs b/common/expression/src/lib.rs index c5790c921eda3..d0ea64e5a02e5 100755 --- a/common/expression/src/lib.rs +++ b/common/expression/src/lib.rs @@ -29,6 +29,7 @@ mod error; mod evaluator; mod expression; mod function; +mod kernels; mod property; pub mod type_check; pub mod types; @@ -40,5 +41,6 @@ pub use crate::error::*; pub use crate::evaluator::*; pub use crate::expression::*; pub use crate::function::*; +pub use crate::kernels::*; pub use crate::property::*; pub use crate::values::*; diff --git a/common/expression/src/types.rs b/common/expression/src/types.rs index 90b5e0436049a..2edd990f4daa3 100755 --- a/common/expression/src/types.rs +++ b/common/expression/src/types.rs @@ -98,6 +98,11 @@ pub trait ValueType: Debug + Clone + PartialEq + Sized + 'static { fn iter_column<'a>(col: &'a Self::Column) -> Self::ColumnIterator<'a>; fn column_to_builder(col: Self::Column) -> Self::ColumnBuilder; + fn column_init_builder(col: &Self::Column, _capacity: usize) -> Self::ColumnBuilder { + let col = Self::slice_column(col, 0..0); + Self::column_to_builder(col) + } + fn builder_len(builder: &Self::ColumnBuilder) -> usize; fn push_item(builder: &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>); fn push_default(builder: &mut Self::ColumnBuilder); @@ -345,3 +350,17 @@ macro_rules! with_number_type { } }} } + +#[macro_export] +macro_rules! with_number_mapped_type { + ($t:tt, $($tail:tt)*) => {{ + match_template::match_template! { + $t = [ + UInt8 => u8, UInt16 => u16, UInt32 => u32, UInt64 => u64, + Int8 => i8, Int16 => i16, Int32 => i32, Int64 => i64, + Float32 => f32, Float64 => f64 + ], + $($tail)* + } + }} +} diff --git a/common/expression/src/types/array.rs b/common/expression/src/types/array.rs index 07a4dbe50065a..f82b70d1bb68b 100755 --- a/common/expression/src/types/array.rs +++ b/common/expression/src/types/array.rs @@ -155,6 +155,13 @@ impl ArrayColumn { )) } + pub fn index_unchecked(&self, index: usize) -> T::Column { + T::slice_column( + &self.values, + (self.offsets[index] as usize)..(self.offsets[index + 1] as usize), + ) + } + pub fn slice(&self, range: Range) -> Self { let offsets = self .offsets diff --git a/common/expression/src/types/boolean.rs b/common/expression/src/types/boolean.rs index 27407db583f96..46efac465c354 100644 --- a/common/expression/src/types/boolean.rs +++ b/common/expression/src/types/boolean.rs @@ -97,6 +97,10 @@ impl ValueType for BooleanType { bitmap_into_mut(col) } + fn column_init_builder(_col: &Self::Column, capacity: usize) -> Self::ColumnBuilder { + MutableBitmap::with_capacity(capacity) + } + fn builder_len(builder: &Self::ColumnBuilder) -> usize { builder.len() } diff --git a/common/expression/src/types/number.rs b/common/expression/src/types/number.rs index cc75579ba16e6..357607bf10d73 100644 --- a/common/expression/src/types/number.rs +++ b/common/expression/src/types/number.rs @@ -109,6 +109,10 @@ impl ValueType for NumberType { buffer_into_mut(col) } + fn column_init_builder(_col: &Self::Column, capacity: usize) -> Self::ColumnBuilder { + Vec::with_capacity(capacity) + } + fn builder_len(builder: &Self::ColumnBuilder) -> usize { builder.len() } diff --git a/common/expression/src/types/string.rs b/common/expression/src/types/string.rs index 9d26e59dda034..d887d811edd1d 100644 --- a/common/expression/src/types/string.rs +++ b/common/expression/src/types/string.rs @@ -92,6 +92,10 @@ impl ValueType for StringType { StringColumnBuilder::from_column(col) } + fn column_init_builder(_col: &Self::Column, capacity: usize) -> Self::ColumnBuilder { + StringColumnBuilder::with_capacity(capacity, 0) + } + fn builder_len(builder: &Self::ColumnBuilder) -> usize { builder.len() } @@ -154,6 +158,10 @@ impl StringColumn { } } + pub fn index_unchecked(&self, index: usize) -> &[u8] { + &self.data[(self.offsets[index] as usize)..(self.offsets[index + 1] as usize)] + } + pub fn slice(&self, range: Range) -> Self { let offsets = self .offsets diff --git a/common/expression/src/values.rs b/common/expression/src/values.rs index 5735566ee0c09..78726f70552eb 100755 --- a/common/expression/src/values.rs +++ b/common/expression/src/values.rs @@ -885,6 +885,28 @@ impl Column { column } } + + pub fn memory_size(&self) -> usize { + match self { + Column::Null { .. } => std::mem::size_of::(), + Column::EmptyArray { .. } => std::mem::size_of::(), + Column::Int8(_) => self.len(), + Column::Int16(_) => self.len() * 2, + Column::Int32(_) => self.len() * 4, + Column::Int64(_) => self.len() * 8, + Column::UInt8(_) => self.len(), + Column::UInt16(_) => self.len() * 2, + Column::UInt32(_) => self.len() * 4, + Column::UInt64(_) => self.len() * 8, + Column::Float32(_) => self.len() * 4, + Column::Float64(_) => self.len() * 8, + Column::Boolean(c) => c.as_slice().0.len(), + Column::String(col) => col.data.len() + col.offsets.len() * 8, + Column::Array(col) => col.values.memory_size() + col.offsets.len() * 8, + Column::Nullable(c) => c.column.memory_size() + c.validity.as_slice().0.len(), + Column::Tuple { fields, .. } => fields.iter().map(|f| f.memory_size()).sum(), + } + } } impl ColumnBuilder { diff --git a/common/expression/tests/it/main.rs b/common/expression/tests/it/main.rs index 74a3b37c90753..9de31425a5226 100644 --- a/common/expression/tests/it/main.rs +++ b/common/expression/tests/it/main.rs @@ -15,1045 +15,6 @@ #![feature(box_patterns)] #![feature(try_blocks)] +mod expression; +mod kernel; mod parser; - -use std::io::Write; -use std::iter::once; -use std::sync::Arc; - -use comfy_table::Table; -use common_ast::DisplayError; -use common_expression::type_check; -use common_expression::types::array::ArrayColumn; -use common_expression::types::nullable::NullableColumn; -use common_expression::types::string::StringColumn; -use common_expression::types::ArrayType; -use common_expression::types::DataType; -use common_expression::types::*; -use common_expression::vectorize_2_arg; -use common_expression::vectorize_with_writer_2_arg; -use common_expression::BooleanDomain; -use common_expression::Chunk; -use common_expression::Column; -use common_expression::ColumnBuilder; -use common_expression::Domain; -use common_expression::DomainCalculator; -use common_expression::Evaluator; -use common_expression::FloatDomain; -use common_expression::Function; -use common_expression::FunctionContext; -use common_expression::FunctionProperty; -use common_expression::FunctionRegistry; -use common_expression::FunctionSignature; -use common_expression::IntDomain; -use common_expression::NullableDomain; -use common_expression::RemoteExpr; -use common_expression::Scalar; -use common_expression::ScalarRef; -use common_expression::Value; -use common_expression::ValueRef; -use goldenfile::Mint; -use parser::parse_raw_expr; - -#[test] -pub fn test_pass() { - let mut mint = Mint::new("tests/it/testdata"); - let mut file = mint.new_goldenfile("run-pass.txt").unwrap(); - - run_ast(&mut file, "true AND false", &[]); - run_ast(&mut file, "null AND false", &[]); - run_ast(&mut file, "plus(a, 10)", &[( - "a", - DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - )]); - run_ast(&mut file, "plus(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - ), - ( - "b", - DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), - ), - ]); - run_ast(&mut file, "plus(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - ), - ("b", DataType::Null, Column::Null { len: 3 }), - ]); - - run_ast(&mut file, "minus(a, 10)", &[( - "a", - DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - )]); - - run_ast(&mut file, "minus(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::UInt16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt16(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - ), - ( - "b", - DataType::Nullable(Box::new(DataType::Int16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int16(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), - ), - ]); - - run_ast(&mut file, "minus(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - ), - ("b", DataType::Null, Column::Null { len: 3 }), - ]); - - run_ast(&mut file, "multiply(a, 10)", &[( - "a", - DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - )]); - - run_ast(&mut file, "multiply(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::UInt16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt16(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - ), - ( - "b", - DataType::Nullable(Box::new(DataType::Int16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int16(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), - ), - ]); - - run_ast(&mut file, "multiply(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::UInt32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt32(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - ), - ( - "b", - DataType::Nullable(Box::new(DataType::Int32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int32(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), - ), - ]); - - run_ast(&mut file, "multiply(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - ), - ("b", DataType::Null, Column::Null { len: 3 }), - ]); - - run_ast(&mut file, "divide(a, 10)", &[( - "a", - DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - )]); - - run_ast(&mut file, "divide(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::UInt16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt16(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - ), - ( - "b", - DataType::Nullable(Box::new(DataType::Int16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int16(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), - ), - ]); - - run_ast(&mut file, "divide(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - ), - ("b", DataType::Null, Column::Null { len: 3 }), - ]); - - run_ast(&mut file, "avg(a, 10)", &[( - "a", - DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - )]); - - run_ast(&mut file, "avg(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::UInt16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt16(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - ), - ( - "b", - DataType::Nullable(Box::new(DataType::Int16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int16(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), - ), - ]); - - run_ast(&mut file, "avg(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::UInt32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt32(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - ), - ( - "b", - DataType::Nullable(Box::new(DataType::Int32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int32(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), - ), - ]); - - run_ast(&mut file, "avg(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::Float32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Float32(vec![10f32, 11f32, 12f32].into()), - validity: vec![false, true, false].into(), - })), - ), - ( - "b", - DataType::Nullable(Box::new(DataType::Int32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int32(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), - ), - ]); - - run_ast(&mut file, "avg(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::Float32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Float32(vec![10f32, 11f32, 12f32].into()), - validity: vec![false, true, false].into(), - })), - ), - ( - "b", - DataType::Nullable(Box::new(DataType::Float64)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Float64(vec![1f64, 2f64, 3f64].into()), - validity: vec![false, true, true].into(), - })), - ), - ]); - - run_ast(&mut file, "multiply(a, b)", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::Int8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), - ), - ("b", DataType::Null, Column::Null { len: 3 }), - ]); - - run_ast(&mut file, "NOT a", &[( - "a", - DataType::Nullable(Box::new(DataType::Boolean)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Boolean(vec![true, false, true].into()), - validity: vec![false, true, false].into(), - })), - )]); - - run_ast(&mut file, "NOT a", &[("a", DataType::Null, Column::Null { - len: 5, - })]); - run_ast(&mut file, "least(10, CAST(20 as Int8), 30, 40)", &[]); - run_ast(&mut file, "create_tuple(null, true)", &[]); - run_ast(&mut file, "get_tuple(1)(create_tuple(a, b))", &[ - ( - "a", - DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, 4].into()), - ), - ( - "b", - DataType::Nullable(Box::new(DataType::String)), - Column::Nullable(Box::new(NullableColumn { - column: Column::String(StringColumn { - data: "abcde".as_bytes().to_vec().into(), - offsets: vec![0, 1, 2, 3, 4, 5].into(), - }), - validity: vec![true, true, false, false, false].into(), - })), - ), - ]); - run_ast(&mut file, "get_tuple(1)(create_tuple(a, b))", &[ - ( - "a", - DataType::Nullable(Box::new(DataType::Boolean)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Boolean(vec![false; 5].into()), - validity: vec![true, true, false, false, false].into(), - })), - ), - ( - "b", - DataType::Nullable(Box::new(DataType::String)), - Column::Nullable(Box::new(NullableColumn { - column: Column::String(StringColumn { - data: "abcde".as_bytes().to_vec().into(), - offsets: vec![0, 1, 2, 3, 4, 5].into(), - }), - validity: vec![true, true, false, false, false].into(), - })), - ), - ]); - run_ast(&mut file, "create_array()", &[]); - run_ast(&mut file, "create_array(null, true)", &[]); - run_ast(&mut file, "create_array(a, b)", &[ - ( - "a", - DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, 4].into()), - ), - ( - "b", - DataType::Int16, - Column::Int16(vec![5, 6, 7, 8, 9].into()), - ), - ]); - run_ast( - &mut file, - "create_array(create_array(a, b), null, null)", - &[ - ( - "a", - DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, 4].into()), - ), - ( - "b", - DataType::Int16, - Column::Int16(vec![5, 6, 7, 8, 9].into()), - ), - ], - ); - run_ast(&mut file, "get(a, b)", &[ - ( - "a", - DataType::Array(Box::new(DataType::Int16)), - Column::Array(Box::new(ArrayColumn { - values: Column::Int16((0..100).collect()), - offsets: vec![0, 20, 40, 60, 80, 100].into(), - })), - ), - ( - "b", - DataType::UInt8, - Column::UInt8(vec![0, 1, 2, 3, 4].into()), - ), - ]); - run_ast(&mut file, "get(a, b)", &[ - ( - "a", - DataType::Array(Box::new(DataType::Array(Box::new(DataType::Int16)))), - Column::Array(Box::new(ArrayColumn { - values: Column::Array(Box::new(ArrayColumn { - values: Column::Int16((0..100).collect()), - offsets: vec![ - 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, - 95, 100, - ] - .into(), - })), - offsets: vec![0, 4, 8, 11, 15, 20].into(), - })), - ), - ( - "b", - DataType::UInt8, - Column::UInt8(vec![0, 1, 2, 3, 4].into()), - ), - ]); - run_ast(&mut file, "TRY_CAST(a AS UINT8)", &[( - "a", - DataType::UInt16, - Column::UInt16(vec![0, 64, 255, 512, 1024].into()), - )]); - run_ast(&mut file, "TRY_CAST(a AS UINT16)", &[( - "a", - DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, -4].into()), - )]); - run_ast(&mut file, "TRY_CAST(a AS INT64)", &[( - "a", - DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, -4].into()), - )]); - run_ast( - &mut file, - "create_tuple(TRY_CAST(a AS FLOAT32), TRY_CAST(a AS INT32), TRY_CAST(b AS FLOAT32), TRY_CAST(b AS INT32))", - &[ - ( - "a", - DataType::UInt64, - Column::UInt64( - vec![ - 0, - 1, - u8::MAX as u64, - u16::MAX as u64, - u32::MAX as u64, - u64::MAX, - ] - .into(), - ), - ), - ( - "b", - DataType::Float64, - Column::Float64( - vec![ - 0.0, - u32::MAX as f64, - u64::MAX as f64, - f64::MIN, - f64::MAX, - f64::INFINITY, - ] - .into(), - ), - ), - ], - ); - run_ast( - &mut file, - "TRY_CAST(create_array(create_array(a, b), null, null) AS Array(Array(Int8)))", - &[ - ( - "a", - DataType::Int16, - Column::Int16(vec![0, 1, 2, 127, 255].into()), - ), - ( - "b", - DataType::Int16, - Column::Int16(vec![0, -1, -127, -128, -129].into()), - ), - ], - ); - run_ast( - &mut file, - "TRY_CAST(create_tuple(a, b, NULL) AS TUPLE(Int8, UInt8, Boolean NULL))", - &[ - ( - "a", - DataType::Int16, - Column::Int16(vec![0, 1, 2, 127, 256].into()), - ), - ( - "b", - DataType::Int16, - Column::Int16(vec![0, 1, -127, -128, -129].into()), - ), - ], - ); - - run_ast(&mut file, "CAST(a AS INT16)", &[( - "a", - DataType::Float64, - Column::Float64(vec![0.0f64, 1.1, 2.2, 3.3, -4.4].into()), - )]); - - run_ast(&mut file, "CAST(b AS INT16)", &[( - "b", - DataType::Int8, - Column::Int8(vec![0, 1, 2, 3, -4].into()), - )]); -} - -#[test] -pub fn test_tyck_fail() { - let mut mint = Mint::new("tests/it/testdata"); - let mut file = mint.new_goldenfile("tyck-fail.txt").unwrap(); - - run_ast(&mut file, "true AND 1", &[]); - run_ast(&mut file, "NOT NOT 'a'", &[]); - run_ast(&mut file, "least(1, 2, 3, a)", &[( - "a", - DataType::Boolean, - Column::Boolean(vec![false; 3].into()), - )]); - run_ast(&mut file, "create_array('a', 1)", &[]); - run_ast(&mut file, "create_array('a', null, 'b', true)", &[]); - run_ast(&mut file, "get(create_array(1, 2), 'a')", &[]); - run_ast(&mut file, "get_tuple(1)(create_tuple(true))", &[]); -} - -#[test] -pub fn test_eval_fail() { - let mut mint = Mint::new("tests/it/testdata"); - let mut file = mint.new_goldenfile("eval-fail.txt").unwrap(); - - run_ast(&mut file, "get(create_array(1, 2), 2)", &[]); - run_ast(&mut file, "get(create_array(a, b), idx)", &[ - ( - "a", - DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, 4].into()), - ), - ( - "b", - DataType::Int16, - Column::Int16(vec![5, 6, 7, 8, 9].into()), - ), - ( - "idx", - DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, 4].into()), - ), - ]); - run_ast(&mut file, "CAST(a AS UINT16)", &[( - "a", - DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, -4].into()), - )]); - - run_ast(&mut file, "CAST(c AS INT16)", &[( - "c", - DataType::Int64, - Column::Int64(vec![0, 11111111111, 2, 3, -4].into()), - )]); -} - -fn builtin_functions() -> FunctionRegistry { - let mut registry = FunctionRegistry::default(); - - registry.register_2_arg::( - "and", - FunctionProperty::default(), - |lhs, rhs| { - Some(BooleanDomain { - has_false: lhs.has_false || rhs.has_false, - has_true: lhs.has_true && rhs.has_true, - }) - }, - |lhs, rhs| lhs && rhs, - ); - - registry.register_2_arg::, NumberType, NumberType, _, _>( - "plus", - FunctionProperty::default(), - |lhs, rhs| { - Some(IntDomain { - min: lhs.min.checked_add(rhs.min).unwrap_or(i16::MAX as i64), - max: lhs.max.checked_add(rhs.max).unwrap_or(i16::MAX as i64), - }) - }, - |lhs, rhs| lhs + rhs, - ); - - registry.register_2_arg::, NumberType, NumberType, _, _>( - "minus", - FunctionProperty::default(), - |lhs, rhs| { - Some(IntDomain { - min: lhs.min.checked_sub(rhs.max).unwrap_or(i32::MAX as i64), - max: lhs.max.checked_sub(rhs.min).unwrap_or(i32::MAX as i64), - }) - }, - |lhs, rhs| lhs - rhs, - ); - - registry.register_2_arg::, NumberType, NumberType, _, _>( - "multiply", - FunctionProperty::default(), - |_, _| None, - |lhs, rhs| lhs * rhs, - ); - - registry.register_2_arg::, NumberType, NumberType, _, _>( - "divide", - FunctionProperty::default(), - |_, _| None, - |lhs, rhs| lhs / rhs, - ); - - registry.register_2_arg::, NumberType, NumberType, _, _>( - "avg", - FunctionProperty::default(), - |lhs, rhs| { - Some(FloatDomain { - min: (lhs.min + rhs.min) / 2.0, - max: (lhs.max + rhs.max) / 2.0, - }) - }, - |lhs, rhs| (lhs + rhs) / 2.0, - ); - - registry.register_1_arg::( - "not", - FunctionProperty::default(), - |arg| { - Some(BooleanDomain { - has_false: arg.has_true, - has_true: arg.has_false, - }) - }, - |val| !val, - ); - - registry.register_function_factory("least", |_, args_type| { - Some(Arc::new(Function { - signature: FunctionSignature { - name: "least", - args_type: vec![DataType::Int16; args_type.len()], - return_type: DataType::Int16, - property: FunctionProperty::default().commutative(true), - }, - calc_domain: Box::new(|args_domain, _| { - let min = args_domain - .iter() - .map(|domain| domain.as_int().unwrap().min) - .min() - .unwrap_or(0); - let max = args_domain - .iter() - .map(|domain| domain.as_int().unwrap().max) - .min() - .unwrap_or(0); - Domain::Int(IntDomain { min, max }) - }), - eval: Box::new(|args, generics| { - if args.is_empty() { - Ok(Value::Scalar(Scalar::Int16(0))) - } else if args.len() == 1 { - Ok(args[0].clone().to_owned()) - } else { - let mut min = - vectorize_2_arg::, NumberType, NumberType>( - |lhs, rhs| lhs.min(rhs), - )( - args[0].try_downcast().unwrap(), - args[1].try_downcast().unwrap(), - generics, - )?; - for arg in &args[2..] { - min = vectorize_2_arg::, NumberType, NumberType>( - |lhs, rhs| lhs.min(rhs), - )( - min.as_ref(), arg.try_downcast().unwrap(), generics - )?; - } - Ok(min.upcast()) - } - }), - })) - }); - - registry.register_0_arg_core::( - "create_array", - FunctionProperty::default(), - || None, - |_| Ok(Value::Scalar(())), - ); - - registry.register_function_factory("create_array", |_, args_type| { - Some(Arc::new(Function { - signature: FunctionSignature { - name: "create_array", - args_type: vec![DataType::Generic(0); args_type.len()], - return_type: DataType::Array(Box::new(DataType::Generic(0))), - property: FunctionProperty::default(), - }, - calc_domain: Box::new(|args_domain, _| { - args_domain.iter().fold(Domain::Array(None), |acc, x| { - acc.merge(&Domain::Array(Some(Box::new(x.clone())))) - }) - }), - eval: Box::new(|args, generics| { - let len = args.iter().find_map(|arg| match arg { - ValueRef::Column(col) => Some(col.len()), - _ => None, - }); - if let Some(len) = len { - let mut array_builder = ColumnBuilder::with_capacity(&generics[0], 0); - for idx in 0..len { - for arg in args { - match arg { - ValueRef::Scalar(scalar) => { - array_builder.push(scalar.clone()); - } - ValueRef::Column(col) => { - array_builder.push(col.index(idx).unwrap()); - } - } - } - } - let offsets = once(0) - .chain((0..len).map(|row| (args.len() * (row + 1)) as u64)) - .collect(); - Ok(Value::Column(Column::Array(Box::new(ArrayColumn { - values: array_builder.build(), - offsets, - })))) - } else { - // All args are scalars, so we return a scalar as result - let mut array = ColumnBuilder::with_capacity(&generics[0], 0); - for arg in args { - match arg { - ValueRef::Scalar(scalar) => { - array.push(scalar.clone()); - } - ValueRef::Column(_) => unreachable!(), - } - } - Ok(Value::Scalar(Scalar::Array(array.build()))) - } - }), - })) - }); - - registry.register_passthrough_nullable_2_arg::>, NumberType, GenericType<0>,_, _>( - "get", - FunctionProperty::default(), - |item_domain, _| Some(item_domain.clone()), - vectorize_with_writer_2_arg::>, NumberType, GenericType<0>>( - |array, idx, output| { - let item = array - .index(idx as usize) - .ok_or_else(|| format!("index out of bounds: the len is {} but the index is {}", array.len(), idx))?; - output.push(item); - Ok(()) - }), - ); - - registry.register_function_factory("create_tuple", |_, args_type| { - Some(Arc::new(Function { - signature: FunctionSignature { - name: "create_tuple", - args_type: args_type.to_vec(), - return_type: DataType::Tuple(args_type.to_vec()), - property: FunctionProperty::default(), - }, - calc_domain: Box::new(|args_domain, _| Domain::Tuple(args_domain.to_vec())), - eval: Box::new(move |args, _generics| { - let len = args.iter().find_map(|arg| match arg { - ValueRef::Column(col) => Some(col.len()), - _ => None, - }); - if let Some(len) = len { - let fields = args - .iter() - .map(|arg| match arg { - ValueRef::Scalar(scalar) => scalar.clone().repeat(len).build(), - ValueRef::Column(col) => col.clone(), - }) - .collect(); - Ok(Value::Column(Column::Tuple { fields, len })) - } else { - // All args are scalars, so we return a scalar as result - let fields = args - .iter() - .map(|arg| match arg { - ValueRef::Scalar(scalar) => (*scalar).to_owned(), - ValueRef::Column(_) => unreachable!(), - }) - .collect(); - Ok(Value::Scalar(Scalar::Tuple(fields))) - } - }), - })) - }); - - registry.register_function_factory("get_tuple", |params, args_type| { - let idx = *params.first()?; - let tuple_tys = match args_type.get(0) { - Some(DataType::Tuple(tys)) => tys, - _ => return None, - }; - if idx >= tuple_tys.len() { - return None; - } - - Some(Arc::new(Function { - signature: FunctionSignature { - name: "get_tuple", - args_type: vec![DataType::Tuple(tuple_tys.to_vec())], - return_type: tuple_tys[idx].clone(), - property: FunctionProperty::default(), - }, - calc_domain: Box::new(move |args_domain, _| { - args_domain[0].as_tuple().unwrap()[idx].clone() - }), - eval: Box::new(move |args, _| match &args[0] { - ValueRef::Scalar(ScalarRef::Tuple(fields)) => { - Ok(Value::Scalar(fields[idx].to_owned())) - } - ValueRef::Column(Column::Tuple { fields, .. }) => { - Ok(Value::Column(fields[idx].to_owned())) - } - _ => unreachable!(), - }), - })) - }); - - registry.register_function_factory("get_tuple", |params, args_type| { - let idx = *params.first()?; - let tuple_tys = match args_type.get(0) { - Some(DataType::Nullable(box DataType::Tuple(tys))) => tys, - _ => return None, - }; - if idx >= tuple_tys.len() { - return None; - } - - Some(Arc::new(Function { - signature: FunctionSignature { - name: "get_tuple", - args_type: vec![DataType::Nullable(Box::new(DataType::Tuple( - tuple_tys.to_vec(), - )))], - return_type: DataType::Nullable(Box::new(tuple_tys[idx].clone())), - property: FunctionProperty::default(), - }, - calc_domain: Box::new(move |args_domain, _| { - let NullableDomain { has_null, value } = args_domain[0].as_nullable().unwrap(); - let value = value.as_ref().map(|value| { - let fields = value.as_tuple().unwrap(); - Box::new(fields[idx].clone()) - }); - Domain::Nullable(NullableDomain { - has_null: *has_null, - value, - }) - }), - eval: Box::new(move |args, _| match &args[0] { - ValueRef::Scalar(ScalarRef::Null) => Ok(Value::Scalar(Scalar::Null)), - ValueRef::Scalar(ScalarRef::Tuple(fields)) => { - Ok(Value::Scalar(fields[idx].to_owned())) - } - ValueRef::Column(Column::Nullable(box NullableColumn { - column: Column::Tuple { fields, .. }, - validity, - })) => Ok(Value::Column(Column::Nullable(Box::new(NullableColumn { - column: fields[idx].to_owned(), - validity: validity.clone(), - })))), - _ => unreachable!(), - }), - })) - }); - - registry -} - -fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Column)]) { - let result = try { - let raw_expr = parse_raw_expr( - text, - &columns - .iter() - .map(|(name, ty, _)| (*name, ty.clone())) - .collect::>(), - ); - - let fn_registry = builtin_functions(); - let (expr, output_ty) = type_check::check(&raw_expr, &fn_registry)?; - - let remote_expr = RemoteExpr::from_expr(expr); - let expr = remote_expr.into_expr(&fn_registry).unwrap(); - - let input_domains = columns - .iter() - .map(|(_, _, col)| col.domain()) - .collect::>(); - - let domain_calculator = DomainCalculator::new(input_domains.clone()); - let output_domain = domain_calculator.calculate(&expr)?; - - let num_rows = columns.iter().map(|col| col.2.len()).max().unwrap_or(0); - let chunk = Chunk::new( - columns - .iter() - .map(|(_, _, col)| Value::Column(col.clone())) - .collect::>(), - num_rows, - ); - - columns.iter().for_each(|(_, _, col)| { - test_arrow_conversion(col); - }); - - let evaluator = Evaluator { - input_columns: chunk, - context: FunctionContext::default(), - }; - let result = evaluator.run(&expr)?; - - ( - raw_expr, - expr, - input_domains, - output_ty, - output_domain, - result, - ) - }; - - match result { - Ok((raw_expr, expr, input_domains, output_ty, output_domain, result)) => { - writeln!(file, "ast : {text}").unwrap(); - writeln!(file, "raw expr : {raw_expr}").unwrap(); - writeln!(file, "checked expr : {expr}").unwrap(); - - match result { - Value::Scalar(output_scalar) => { - writeln!(file, "output type : {output_ty}").unwrap(); - writeln!(file, "output domain : {output_domain}").unwrap(); - writeln!(file, "output : {}", output_scalar.as_ref()).unwrap(); - } - Value::Column(output_col) => { - test_arrow_conversion(&output_col); - - let mut table = Table::new(); - table.load_preset("||--+-++| ++++++"); - - let mut header = vec!["".to_string()]; - header.extend(columns.iter().map(|(name, _, _)| name.to_string())); - header.push("Output".to_string()); - table.set_header(header); - - let mut type_row = vec!["Type".to_string()]; - type_row.extend(columns.iter().map(|(_, ty, _)| ty.to_string())); - type_row.push(output_ty.to_string()); - table.add_row(type_row); - - let mut domain_row = vec!["Domain".to_string()]; - domain_row.extend(input_domains.iter().map(|domain| domain.to_string())); - domain_row.push(output_domain.to_string()); - table.add_row(domain_row); - - for i in 0..output_col.len() { - let mut row = vec![format!("Row {i}")]; - for (_, _, col) in columns.iter() { - let value = col.index(i).unwrap(); - row.push(format!("{}", value)); - } - row.push(format!("{}", output_col.index(i).unwrap())); - table.add_row(row); - } - - writeln!(file, "evaluation:\n{table}").unwrap(); - - let mut table = Table::new(); - table.load_preset("||--+-++| ++++++"); - - table.set_header(&["Column", "Data"]); - - for (name, _, col) in columns.iter() { - table.add_row(&[name.to_string(), format!("{col:?}")]); - } - - table.add_row(["Output".to_string(), format!("{output_col:?}")]); - - writeln!(file, "evaluation (internal):\n{table}").unwrap(); - } - } - write!(file, "\n\n").unwrap(); - } - Err((Some(span), msg)) => { - writeln!(file, "{}\n", span.display_error((text.to_string(), msg))).unwrap(); - } - Err((None, msg)) => { - writeln!(file, "error: {}\n", msg).unwrap(); - } - } -} - -fn test_arrow_conversion(col: &Column) { - let arrow_col = col.as_arrow(); - let new_col = Column::from_arrow(&*arrow_col); - assert_eq!(col, &new_col, "arrow conversion went wrong"); -} From c610561a16b71574d1a383b82deb6b69c4c96d19 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Mon, 8 Aug 2022 21:42:51 +0800 Subject: [PATCH 02/59] feat(query): add concat/filter kernel for chunk --- common/expression/src/kernels/concat.rs | 127 ++ common/expression/src/kernels/filter.rs | 256 ++++ common/expression/src/kernels/mod.rs | 16 + common/expression/tests/it/expression.rs | 1055 +++++++++++++++++ common/expression/tests/it/kernel.rs | 155 +++ .../tests/it/testdata/kernel-pass.txt | 74 ++ 6 files changed, 1683 insertions(+) create mode 100644 common/expression/src/kernels/concat.rs create mode 100644 common/expression/src/kernels/filter.rs create mode 100644 common/expression/src/kernels/mod.rs create mode 100644 common/expression/tests/it/expression.rs create mode 100644 common/expression/tests/it/kernel.rs create mode 100644 common/expression/tests/it/testdata/kernel-pass.txt diff --git a/common/expression/src/kernels/concat.rs b/common/expression/src/kernels/concat.rs new file mode 100644 index 0000000000000..5c617d2df989e --- /dev/null +++ b/common/expression/src/kernels/concat.rs @@ -0,0 +1,127 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apach&e License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_arrow::arrow::buffer::Buffer; +use common_exception::ErrorCode; +use common_exception::Result; + +use crate::types::nullable::NullableColumn; +use crate::types::AnyType; +use crate::types::ArrayType; +use crate::types::BooleanType; +use crate::types::EmptyArrayType; +use crate::types::NullType; +use crate::types::NullableType; +use crate::types::NumberType; +use crate::types::StringType; +use crate::types::ValueType; +use crate::with_number_mapped_type; +use crate::Chunk; +use crate::Column; +use crate::Value; + +impl Chunk { + pub fn concat(chunks: &[Chunk]) -> Result { + if chunks.is_empty() { + return Err(ErrorCode::EmptyData("Can't concat empty chunks")); + } + + if chunks.len() == 1 { + return Ok(chunks[0].clone()); + } + + let num_rows = chunks.iter().map(|c| c.num_rows()).sum(); + let mut concat_columns = Vec::with_capacity(chunks[0].num_columns()); + for i in 0..chunks[0].num_columns() { + let mut columns = Vec::with_capacity(chunks.len()); + for chunk in chunks.iter() { + let c = &chunk.columns()[i]; + match c { + Value::Scalar(s) => { + let builder = s.as_ref().repeat(chunk.num_rows()); + let col = builder.build(); + columns.push(col); + } + Value::Column(c) => columns.push(c.clone()), + } + } + let c = Column::concat(&columns); + concat_columns.push(Value::Column(c)); + } + Ok(Chunk::new(concat_columns, num_rows)) + } +} + +impl Column { + pub fn concat(columns: &[Column]) -> Column { + if columns.len() == 1 { + return columns[0].clone(); + } + + with_number_mapped_type!(SRC_TYPE, match &columns[0] { + Column::SRC_TYPE(_) => { + let mut values = Vec::with_capacity(columns.len()); + for c in columns.iter() { + let value = NumberType::::try_downcast_column(c).unwrap(); + values.push(value.clone()); + } + NumberType::::upcast_column(Self::concat_primitive_types(&values)) + } + Column::Null { .. } => Self::concat_scalar_types::(columns), + Column::EmptyArray { .. } => Self::concat_scalar_types::(columns), + Column::Boolean(_) => Self::concat_scalar_types::(columns), + Column::String(_) => Self::concat_scalar_types::(columns), + Column::Array(_) => Self::concat_scalar_types::>(columns), + Column::Nullable(_) => { + let mut bitmaps = Vec::with_capacity(columns.len()); + let mut inners = Vec::with_capacity(columns.len()); + for c in columns { + let nullable_column = NullableType::::try_downcast_column(&c).unwrap(); + inners.push(nullable_column.column); + bitmaps.push(Column::Boolean(nullable_column.validity)); + } + let column = Self::concat(&inners); + let validity = Self::concat_scalar_types::(&bitmaps); + let validity = BooleanType::try_downcast_column(&validity).unwrap(); + Column::Nullable(Box::new(NullableColumn { column, validity })) + } + Column::Tuple { .. } => Self::concat_scalar_types::(columns), + }) + } + + fn concat_primitive_types(values: &[Buffer]) -> Buffer { + let capacity = values.iter().map(|c| c.len()).sum(); + let mut results = Vec::with_capacity(capacity); + for value in values { + results.extend_from_slice(value.as_slice()); + } + results.into() + } + + fn concat_scalar_types(columns: &[Column]) -> Column { + let capacity = columns.iter().map(|c| c.len()).sum(); + let columns: Vec = columns + .iter() + .map(|c| T::try_downcast_column(c).unwrap()) + .collect(); + + let mut builder = T::column_init_builder(&columns[0], capacity); + for col in columns { + for item in T::iter_column(&col) { + T::push_item(&mut builder, item) + } + } + T::upcast_column(T::build_column(builder)) + } +} diff --git a/common/expression/src/kernels/filter.rs b/common/expression/src/kernels/filter.rs new file mode 100644 index 0000000000000..b987e1e243f62 --- /dev/null +++ b/common/expression/src/kernels/filter.rs @@ -0,0 +1,256 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apach&e License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_arrow::arrow::bitmap::utils::BitChunkIterExact; +use common_arrow::arrow::bitmap::utils::BitChunksExact; +use common_arrow::arrow::bitmap::Bitmap; +use common_arrow::arrow::buffer::Buffer; +use common_exception::ErrorCode; +use common_exception::Result; + +use crate::types::nullable::NullableColumn; +use crate::types::AnyType; +use crate::types::ArrayType; +use crate::types::BooleanType; +use crate::types::NullableType; +use crate::types::StringType; +use crate::types::ValueType; +use crate::with_number_type; +use crate::Chunk; +use crate::Column; +use crate::Value; + +impl Chunk { + pub fn filter(self, predicate: &Value) -> Result { + if self.num_columns() == 0 || self.num_rows() == 0 { + return Ok(self); + } + + let predicate = Self::cast_to_nonull_boolean(predicate).ok_or_else(|| { + ErrorCode::BadDataValueType(format!( + "Filter predict column does not support type '{:?}'", + predicate + )) + })?; + + match predicate { + Value::Scalar(s) => { + if s { + Ok(self) + } else { + Ok(Chunk::empty()) + } + } + Value::Column(bitmap) => { + let count_zeros = bitmap.unset_bits(); + match count_zeros { + 0 => Ok(self), + _ => { + if count_zeros == self.num_rows() { + return Ok(Chunk::empty()); + } + let mut after_columns = Vec::with_capacity(self.num_columns()); + for value in self.columns() { + match value { + Value::Scalar(v) => after_columns.push(Value::Scalar(v.clone())), + Value::Column(c) => { + after_columns.push(Value::Column(Column::filter(c, &bitmap))) + } + } + } + Ok(Chunk::new(after_columns, self.num_rows() - count_zeros)) + } + } + } + } + } + + // Must be nullable boolean or boolean value + fn cast_to_nonull_boolean(predicate: &Value) -> Option> { + match predicate { + Value::Scalar(v) => { + if let Some(v) = NullableType::::try_downcast_scalar(&v.as_ref()) { + Some(Value::Scalar(v.unwrap_or_default())) + } else { + BooleanType::try_downcast_scalar(&v.as_ref()).map(|c| Value::Scalar(c)) + } + } + Value::Column(c) => { + if let Some(nb) = NullableType::::try_downcast_column(&c) { + let validity = common_arrow::arrow::bitmap::and(&nb.validity, &nb.column); + Some(Value::Column(validity)) + } else { + BooleanType::try_downcast_column(&c).map(|c| Value::Column(c)) + } + } + } + } +} + +impl Column { + pub fn filter(&self, filter: &Bitmap) -> Column { + with_number_type!(SRC_TYPE, match self { + Column::SRC_TYPE(values) => { + Column::SRC_TYPE(Self::filter_primitive_types(values, filter)) + } + Column::Null { .. } | Column::EmptyArray { .. } => + self.slice(0..filter.len() - filter.unset_bits()), + Column::Boolean(bm) => Self::filter_scalar_types::(bm, filter), + Column::String(column) => Self::filter_scalar_types::(column, filter), + Column::Array(column) => + Self::filter_scalar_types::>(column, filter), + Column::Nullable(c) => { + let column = Self::filter(&c.column, filter); + let validity = Self::filter_scalar_types::(&c.validity, filter); + Column::Nullable(Box::new(NullableColumn { + column, + validity: BooleanType::try_downcast_column(&validity).unwrap(), + })) + } + Column::Tuple { fields, .. } => { + let len = filter.len() - filter.unset_bits(); + let fields = fields.iter().map(|c| c.filter(filter)).collect(); + Column::Tuple { fields, len } + } + }) + } + + fn filter_scalar_types(col: &T::Column, filter: &Bitmap) -> Column { + let length = filter.len() - filter.unset_bits(); + if length == T::column_len(col) { + return T::upcast_column(col.clone()); + } + const CHUNK_SIZE: usize = 64; + let mut builder = T::column_init_builder(col, length); + let (mut slice, offset, mut length) = filter.as_slice(); + let mut start_index: usize = 0; + + if offset > 0 { + let n = 8 - offset; + start_index += n; + filter + .iter() + .enumerate() + .take(n) + .for_each(|(index, is_selected)| { + if is_selected { + T::push_item(&mut builder, T::index_column(col, index).unwrap()); + } + }); + slice = &slice[1..]; + length -= n; + } + + let mut mask_chunks = BitChunksExact::::new(slice, length); + + mask_chunks + .by_ref() + .enumerate() + .for_each(|(mask_index, mut mask)| { + while mask != 0 { + let n = mask.trailing_zeros() as usize; + let index = mask_index * CHUNK_SIZE + n + start_index; + T::push_item(&mut builder, T::index_column(col, index).unwrap()); + mask = mask & (mask - 1); + } + }); + + let remainder_start = length - length % CHUNK_SIZE; + mask_chunks + .remainder_iter() + .enumerate() + .for_each(|(mask_index, is_selected)| { + if is_selected { + let index = mask_index + remainder_start + start_index; + T::push_item(&mut builder, T::index_column(col, index).unwrap()); + } + }); + + T::upcast_column(T::build_column(builder)) + } + + // low-level API using unsafe to improve performance + fn filter_primitive_types(values: &Buffer, filter: &Bitmap) -> Buffer { + assert_eq!(values.len(), filter.len()); + let selected = filter.len() - filter.unset_bits(); + if selected == values.len() { + return values.clone(); + } + let mut values = values.as_slice(); + let mut new = Vec::::with_capacity(selected); + let mut dst = new.as_mut_ptr(); + + let (mut slice, offset, mut length) = filter.as_slice(); + if offset > 0 { + // Consume the offset + let n = 8 - offset; + values + .iter() + .zip(filter.iter()) + .take(n) + .for_each(|(value, is_selected)| { + if is_selected { + unsafe { + dst.write(*value); + dst = dst.add(1); + } + } + }); + slice = &slice[1..]; + length -= n; + values = &values[n..]; + } + + const CHUNK_SIZE: usize = 64; + let mut chunks = values.chunks_exact(CHUNK_SIZE); + let mut mask_chunks = BitChunksExact::::new(slice, length); + + chunks + .by_ref() + .zip(mask_chunks.by_ref()) + .for_each(|(chunk, mut mask)| { + if mask == u64::MAX { + unsafe { + std::ptr::copy(chunk.as_ptr(), dst, CHUNK_SIZE); + dst = dst.add(CHUNK_SIZE); + } + } else { + while mask != 0 { + let n = mask.trailing_zeros() as usize; + unsafe { + dst.write(chunk[n]); + dst = dst.add(1); + } + mask = mask & (mask - 1); + } + } + }); + + chunks + .remainder() + .iter() + .zip(mask_chunks.remainder_iter()) + .for_each(|(value, is_selected)| { + if is_selected { + unsafe { + dst.write(*value); + dst = dst.add(1); + } + } + }); + + unsafe { new.set_len(selected) }; + new.into() + } +} diff --git a/common/expression/src/kernels/mod.rs b/common/expression/src/kernels/mod.rs new file mode 100644 index 0000000000000..a02c5fe571148 --- /dev/null +++ b/common/expression/src/kernels/mod.rs @@ -0,0 +1,16 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod concat; +mod filter; diff --git a/common/expression/tests/it/expression.rs b/common/expression/tests/it/expression.rs new file mode 100644 index 0000000000000..b6796def52462 --- /dev/null +++ b/common/expression/tests/it/expression.rs @@ -0,0 +1,1055 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::io::Write; +use std::iter::once; +use std::sync::Arc; + +use comfy_table::Table; +use common_ast::DisplayError; +use common_expression::type_check; +use common_expression::types::array::ArrayColumn; +use common_expression::types::nullable::NullableColumn; +use common_expression::types::string::StringColumn; +use common_expression::types::ArrayType; +use common_expression::types::DataType; +use common_expression::types::*; +use common_expression::vectorize_2_arg; +use common_expression::vectorize_with_writer_2_arg; +use common_expression::BooleanDomain; +use common_expression::Chunk; +use common_expression::Column; +use common_expression::ColumnBuilder; +use common_expression::Domain; +use common_expression::DomainCalculator; +use common_expression::Evaluator; +use common_expression::FloatDomain; +use common_expression::Function; +use common_expression::FunctionContext; +use common_expression::FunctionProperty; +use common_expression::FunctionRegistry; +use common_expression::FunctionSignature; +use common_expression::IntDomain; +use common_expression::NullableDomain; +use common_expression::RemoteExpr; +use common_expression::Scalar; +use common_expression::ScalarRef; +use common_expression::Value; +use common_expression::ValueRef; +use goldenfile::Mint; + +use crate::parser::parse_raw_expr; + +#[test] +pub fn test_pass() { + let mut mint = Mint::new("tests/it/testdata"); + let mut file = mint.new_goldenfile("run-pass.txt").unwrap(); + + run_ast(&mut file, "true AND false", &[]); + run_ast(&mut file, "null AND false", &[]); + run_ast(&mut file, "plus(a, 10)", &[( + "a", + DataType::Nullable(Box::new(DataType::UInt8)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + )]); + run_ast(&mut file, "plus(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::UInt8)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + ), + ( + "b", + DataType::Nullable(Box::new(DataType::UInt8)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![1, 2, 3].into()), + validity: vec![false, true, true].into(), + })), + ), + ]); + run_ast(&mut file, "plus(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::UInt8)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + ), + ("b", DataType::Null, Column::Null { len: 3 }), + ]); + + run_ast(&mut file, "minus(a, 10)", &[( + "a", + DataType::Nullable(Box::new(DataType::UInt8)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + )]); + + run_ast(&mut file, "minus(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::UInt16)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt16(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + ), + ( + "b", + DataType::Nullable(Box::new(DataType::Int16)), + Column::Nullable(Box::new(NullableColumn { + column: Column::Int16(vec![1, 2, 3].into()), + validity: vec![false, true, true].into(), + })), + ), + ]); + + run_ast(&mut file, "minus(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::UInt8)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + ), + ("b", DataType::Null, Column::Null { len: 3 }), + ]); + + run_ast(&mut file, "multiply(a, 10)", &[( + "a", + DataType::Nullable(Box::new(DataType::UInt8)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + )]); + + run_ast(&mut file, "multiply(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::UInt16)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt16(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + ), + ( + "b", + DataType::Nullable(Box::new(DataType::Int16)), + Column::Nullable(Box::new(NullableColumn { + column: Column::Int16(vec![1, 2, 3].into()), + validity: vec![false, true, true].into(), + })), + ), + ]); + + run_ast(&mut file, "multiply(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::UInt32)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt32(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + ), + ( + "b", + DataType::Nullable(Box::new(DataType::Int32)), + Column::Nullable(Box::new(NullableColumn { + column: Column::Int32(vec![1, 2, 3].into()), + validity: vec![false, true, true].into(), + })), + ), + ]); + + run_ast(&mut file, "multiply(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::UInt8)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + ), + ("b", DataType::Null, Column::Null { len: 3 }), + ]); + + run_ast(&mut file, "divide(a, 10)", &[( + "a", + DataType::Nullable(Box::new(DataType::UInt8)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + )]); + + run_ast(&mut file, "divide(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::UInt16)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt16(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + ), + ( + "b", + DataType::Nullable(Box::new(DataType::Int16)), + Column::Nullable(Box::new(NullableColumn { + column: Column::Int16(vec![1, 2, 3].into()), + validity: vec![false, true, true].into(), + })), + ), + ]); + + run_ast(&mut file, "divide(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::UInt8)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + ), + ("b", DataType::Null, Column::Null { len: 3 }), + ]); + + run_ast(&mut file, "avg(a, 10)", &[( + "a", + DataType::Nullable(Box::new(DataType::UInt8)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + )]); + + run_ast(&mut file, "avg(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::UInt16)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt16(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + ), + ( + "b", + DataType::Nullable(Box::new(DataType::Int16)), + Column::Nullable(Box::new(NullableColumn { + column: Column::Int16(vec![1, 2, 3].into()), + validity: vec![false, true, true].into(), + })), + ), + ]); + + run_ast(&mut file, "avg(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::UInt32)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt32(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + ), + ( + "b", + DataType::Nullable(Box::new(DataType::Int32)), + Column::Nullable(Box::new(NullableColumn { + column: Column::Int32(vec![1, 2, 3].into()), + validity: vec![false, true, true].into(), + })), + ), + ]); + + run_ast(&mut file, "avg(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::Float32)), + Column::Nullable(Box::new(NullableColumn { + column: Column::Float32(vec![10f32, 11f32, 12f32].into()), + validity: vec![false, true, false].into(), + })), + ), + ( + "b", + DataType::Nullable(Box::new(DataType::Int32)), + Column::Nullable(Box::new(NullableColumn { + column: Column::Int32(vec![1, 2, 3].into()), + validity: vec![false, true, true].into(), + })), + ), + ]); + + run_ast(&mut file, "avg(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::Float32)), + Column::Nullable(Box::new(NullableColumn { + column: Column::Float32(vec![10f32, 11f32, 12f32].into()), + validity: vec![false, true, false].into(), + })), + ), + ( + "b", + DataType::Nullable(Box::new(DataType::Float64)), + Column::Nullable(Box::new(NullableColumn { + column: Column::Float64(vec![1f64, 2f64, 3f64].into()), + validity: vec![false, true, true].into(), + })), + ), + ]); + + run_ast(&mut file, "multiply(a, b)", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::Int8)), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12].into()), + validity: vec![false, true, false].into(), + })), + ), + ("b", DataType::Null, Column::Null { len: 3 }), + ]); + + run_ast(&mut file, "NOT a", &[( + "a", + DataType::Nullable(Box::new(DataType::Boolean)), + Column::Nullable(Box::new(NullableColumn { + column: Column::Boolean(vec![true, false, true].into()), + validity: vec![false, true, false].into(), + })), + )]); + + run_ast(&mut file, "NOT a", &[("a", DataType::Null, Column::Null { + len: 5, + })]); + run_ast(&mut file, "least(10, CAST(20 as Int8), 30, 40)", &[]); + run_ast(&mut file, "create_tuple(null, true)", &[]); + run_ast(&mut file, "get_tuple(1)(create_tuple(a, b))", &[ + ( + "a", + DataType::Int16, + Column::Int16(vec![0, 1, 2, 3, 4].into()), + ), + ( + "b", + DataType::Nullable(Box::new(DataType::String)), + Column::Nullable(Box::new(NullableColumn { + column: Column::String(StringColumn { + data: "abcde".as_bytes().to_vec().into(), + offsets: vec![0, 1, 2, 3, 4, 5].into(), + }), + validity: vec![true, true, false, false, false].into(), + })), + ), + ]); + run_ast(&mut file, "get_tuple(1)(create_tuple(a, b))", &[ + ( + "a", + DataType::Nullable(Box::new(DataType::Boolean)), + Column::Nullable(Box::new(NullableColumn { + column: Column::Boolean(vec![false; 5].into()), + validity: vec![true, true, false, false, false].into(), + })), + ), + ( + "b", + DataType::Nullable(Box::new(DataType::String)), + Column::Nullable(Box::new(NullableColumn { + column: Column::String(StringColumn { + data: "abcde".as_bytes().to_vec().into(), + offsets: vec![0, 1, 2, 3, 4, 5].into(), + }), + validity: vec![true, true, false, false, false].into(), + })), + ), + ]); + run_ast(&mut file, "create_array()", &[]); + run_ast(&mut file, "create_array(null, true)", &[]); + run_ast(&mut file, "create_array(a, b)", &[ + ( + "a", + DataType::Int16, + Column::Int16(vec![0, 1, 2, 3, 4].into()), + ), + ( + "b", + DataType::Int16, + Column::Int16(vec![5, 6, 7, 8, 9].into()), + ), + ]); + run_ast( + &mut file, + "create_array(create_array(a, b), null, null)", + &[ + ( + "a", + DataType::Int16, + Column::Int16(vec![0, 1, 2, 3, 4].into()), + ), + ( + "b", + DataType::Int16, + Column::Int16(vec![5, 6, 7, 8, 9].into()), + ), + ], + ); + run_ast(&mut file, "get(a, b)", &[ + ( + "a", + DataType::Array(Box::new(DataType::Int16)), + Column::Array(Box::new(ArrayColumn { + values: Column::Int16((0..100).collect()), + offsets: vec![0, 20, 40, 60, 80, 100].into(), + })), + ), + ( + "b", + DataType::UInt8, + Column::UInt8(vec![0, 1, 2, 3, 4].into()), + ), + ]); + run_ast(&mut file, "get(a, b)", &[ + ( + "a", + DataType::Array(Box::new(DataType::Array(Box::new(DataType::Int16)))), + Column::Array(Box::new(ArrayColumn { + values: Column::Array(Box::new(ArrayColumn { + values: Column::Int16((0..100).collect()), + offsets: vec![ + 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, + 95, 100, + ] + .into(), + })), + offsets: vec![0, 4, 8, 11, 15, 20].into(), + })), + ), + ( + "b", + DataType::UInt8, + Column::UInt8(vec![0, 1, 2, 3, 4].into()), + ), + ]); + run_ast(&mut file, "TRY_CAST(a AS UINT8)", &[( + "a", + DataType::UInt16, + Column::UInt16(vec![0, 64, 255, 512, 1024].into()), + )]); + run_ast(&mut file, "TRY_CAST(a AS UINT16)", &[( + "a", + DataType::Int16, + Column::Int16(vec![0, 1, 2, 3, -4].into()), + )]); + run_ast(&mut file, "TRY_CAST(a AS INT64)", &[( + "a", + DataType::Int16, + Column::Int16(vec![0, 1, 2, 3, -4].into()), + )]); + run_ast( + &mut file, + "create_tuple(TRY_CAST(a AS FLOAT32), TRY_CAST(a AS INT32), TRY_CAST(b AS FLOAT32), TRY_CAST(b AS INT32))", + &[ + ( + "a", + DataType::UInt64, + Column::UInt64( + vec![ + 0, + 1, + u8::MAX as u64, + u16::MAX as u64, + u32::MAX as u64, + u64::MAX, + ] + .into(), + ), + ), + ( + "b", + DataType::Float64, + Column::Float64( + vec![ + 0.0, + u32::MAX as f64, + u64::MAX as f64, + f64::MIN, + f64::MAX, + f64::INFINITY, + ] + .into(), + ), + ), + ], + ); + run_ast( + &mut file, + "TRY_CAST(create_array(create_array(a, b), null, null) AS Array(Array(Int8)))", + &[ + ( + "a", + DataType::Int16, + Column::Int16(vec![0, 1, 2, 127, 255].into()), + ), + ( + "b", + DataType::Int16, + Column::Int16(vec![0, -1, -127, -128, -129].into()), + ), + ], + ); + run_ast( + &mut file, + "TRY_CAST(create_tuple(a, b, NULL) AS TUPLE(Int8, UInt8, Boolean NULL))", + &[ + ( + "a", + DataType::Int16, + Column::Int16(vec![0, 1, 2, 127, 256].into()), + ), + ( + "b", + DataType::Int16, + Column::Int16(vec![0, 1, -127, -128, -129].into()), + ), + ], + ); + + run_ast(&mut file, "CAST(a AS INT16)", &[( + "a", + DataType::Float64, + Column::Float64(vec![0.0f64, 1.1, 2.2, 3.3, -4.4].into()), + )]); + + run_ast(&mut file, "CAST(b AS INT16)", &[( + "b", + DataType::Int8, + Column::Int8(vec![0, 1, 2, 3, -4].into()), + )]); +} + +#[test] +pub fn test_tyck_fail() { + let mut mint = Mint::new("tests/it/testdata"); + let mut file = mint.new_goldenfile("tyck-fail.txt").unwrap(); + + run_ast(&mut file, "true AND 1", &[]); + run_ast(&mut file, "NOT NOT 'a'", &[]); + run_ast(&mut file, "least(1, 2, 3, a)", &[( + "a", + DataType::Boolean, + Column::Boolean(vec![false; 3].into()), + )]); + run_ast(&mut file, "create_array('a', 1)", &[]); + run_ast(&mut file, "create_array('a', null, 'b', true)", &[]); + run_ast(&mut file, "get(create_array(1, 2), 'a')", &[]); + run_ast(&mut file, "get_tuple(1)(create_tuple(true))", &[]); +} + +#[test] +pub fn test_eval_fail() { + let mut mint = Mint::new("tests/it/testdata"); + let mut file = mint.new_goldenfile("eval-fail.txt").unwrap(); + + run_ast(&mut file, "get(create_array(1, 2), 2)", &[]); + run_ast(&mut file, "get(create_array(a, b), idx)", &[ + ( + "a", + DataType::Int16, + Column::Int16(vec![0, 1, 2, 3, 4].into()), + ), + ( + "b", + DataType::Int16, + Column::Int16(vec![5, 6, 7, 8, 9].into()), + ), + ( + "idx", + DataType::Int16, + Column::Int16(vec![0, 1, 2, 3, 4].into()), + ), + ]); + run_ast(&mut file, "CAST(a AS UINT16)", &[( + "a", + DataType::Int16, + Column::Int16(vec![0, 1, 2, 3, -4].into()), + )]); + + run_ast(&mut file, "CAST(c AS INT16)", &[( + "c", + DataType::Int64, + Column::Int64(vec![0, 11111111111, 2, 3, -4].into()), + )]); +} + +fn builtin_functions() -> FunctionRegistry { + let mut registry = FunctionRegistry::default(); + + registry.register_2_arg::( + "and", + FunctionProperty::default(), + |lhs, rhs| { + Some(BooleanDomain { + has_false: lhs.has_false || rhs.has_false, + has_true: lhs.has_true && rhs.has_true, + }) + }, + |lhs, rhs| lhs && rhs, + ); + + registry.register_2_arg::, NumberType, NumberType, _, _>( + "plus", + FunctionProperty::default(), + |lhs, rhs| { + Some(IntDomain { + min: lhs.min.checked_add(rhs.min).unwrap_or(i16::MAX as i64), + max: lhs.max.checked_add(rhs.max).unwrap_or(i16::MAX as i64), + }) + }, + |lhs, rhs| lhs + rhs, + ); + + registry.register_2_arg::, NumberType, NumberType, _, _>( + "minus", + FunctionProperty::default(), + |lhs, rhs| { + Some(IntDomain { + min: lhs.min.checked_sub(rhs.max).unwrap_or(i32::MAX as i64), + max: lhs.max.checked_sub(rhs.min).unwrap_or(i32::MAX as i64), + }) + }, + |lhs, rhs| lhs - rhs, + ); + + registry.register_2_arg::, NumberType, NumberType, _, _>( + "multiply", + FunctionProperty::default(), + |_, _| None, + |lhs, rhs| lhs * rhs, + ); + + registry.register_2_arg::, NumberType, NumberType, _, _>( + "divide", + FunctionProperty::default(), + |_, _| None, + |lhs, rhs| lhs / rhs, + ); + + registry.register_2_arg::, NumberType, NumberType, _, _>( + "avg", + FunctionProperty::default(), + |lhs, rhs| { + Some(FloatDomain { + min: (lhs.min + rhs.min) / 2.0, + max: (lhs.max + rhs.max) / 2.0, + }) + }, + |lhs, rhs| (lhs + rhs) / 2.0, + ); + + registry.register_1_arg::( + "not", + FunctionProperty::default(), + |arg| { + Some(BooleanDomain { + has_false: arg.has_true, + has_true: arg.has_false, + }) + }, + |val| !val, + ); + + registry.register_function_factory("least", |_, args_type| { + Some(Arc::new(Function { + signature: FunctionSignature { + name: "least", + args_type: vec![DataType::Int16; args_type.len()], + return_type: DataType::Int16, + property: FunctionProperty::default().commutative(true), + }, + calc_domain: Box::new(|args_domain, _| { + let min = args_domain + .iter() + .map(|domain| domain.as_int().unwrap().min) + .min() + .unwrap_or(0); + let max = args_domain + .iter() + .map(|domain| domain.as_int().unwrap().max) + .min() + .unwrap_or(0); + Domain::Int(IntDomain { min, max }) + }), + eval: Box::new(|args, generics| { + if args.is_empty() { + Ok(Value::Scalar(Scalar::Int16(0))) + } else if args.len() == 1 { + Ok(args[0].clone().to_owned()) + } else { + let mut min = + vectorize_2_arg::, NumberType, NumberType>( + |lhs, rhs| lhs.min(rhs), + )( + args[0].try_downcast().unwrap(), + args[1].try_downcast().unwrap(), + generics, + )?; + for arg in &args[2..] { + min = vectorize_2_arg::, NumberType, NumberType>( + |lhs, rhs| lhs.min(rhs), + )( + min.as_ref(), arg.try_downcast().unwrap(), generics + )?; + } + Ok(min.upcast()) + } + }), + })) + }); + + registry.register_0_arg_core::( + "create_array", + FunctionProperty::default(), + || None, + |_| Ok(Value::Scalar(())), + ); + + registry.register_function_factory("create_array", |_, args_type| { + Some(Arc::new(Function { + signature: FunctionSignature { + name: "create_array", + args_type: vec![DataType::Generic(0); args_type.len()], + return_type: DataType::Array(Box::new(DataType::Generic(0))), + property: FunctionProperty::default(), + }, + calc_domain: Box::new(|args_domain, _| { + args_domain.iter().fold(Domain::Array(None), |acc, x| { + acc.merge(&Domain::Array(Some(Box::new(x.clone())))) + }) + }), + eval: Box::new(|args, generics| { + let len = args.iter().find_map(|arg| match arg { + ValueRef::Column(col) => Some(col.len()), + _ => None, + }); + if let Some(len) = len { + let mut array_builder = ColumnBuilder::with_capacity(&generics[0], 0); + for idx in 0..len { + for arg in args { + match arg { + ValueRef::Scalar(scalar) => { + array_builder.push(scalar.clone()); + } + ValueRef::Column(col) => { + array_builder.push(col.index(idx).unwrap()); + } + } + } + } + let offsets = once(0) + .chain((0..len).map(|row| (args.len() * (row + 1)) as u64)) + .collect(); + Ok(Value::Column(Column::Array(Box::new(ArrayColumn { + values: array_builder.build(), + offsets, + })))) + } else { + // All args are scalars, so we return a scalar as result + let mut array = ColumnBuilder::with_capacity(&generics[0], 0); + for arg in args { + match arg { + ValueRef::Scalar(scalar) => { + array.push(scalar.clone()); + } + ValueRef::Column(_) => unreachable!(), + } + } + Ok(Value::Scalar(Scalar::Array(array.build()))) + } + }), + })) + }); + + registry.register_passthrough_nullable_2_arg::>, NumberType, GenericType<0>,_, _>( + "get", + FunctionProperty::default(), + |item_domain, _| Some(item_domain.clone()), + vectorize_with_writer_2_arg::>, NumberType, GenericType<0>>( + |array, idx, output| { + let item = array + .index(idx as usize) + .ok_or_else(|| format!("index out of bounds: the len is {} but the index is {}", array.len(), idx))?; + output.push(item); + Ok(()) + }), + ); + + registry.register_function_factory("create_tuple", |_, args_type| { + Some(Arc::new(Function { + signature: FunctionSignature { + name: "create_tuple", + args_type: args_type.to_vec(), + return_type: DataType::Tuple(args_type.to_vec()), + property: FunctionProperty::default(), + }, + calc_domain: Box::new(|args_domain, _| Domain::Tuple(args_domain.to_vec())), + eval: Box::new(move |args, _generics| { + let len = args.iter().find_map(|arg| match arg { + ValueRef::Column(col) => Some(col.len()), + _ => None, + }); + if let Some(len) = len { + let fields = args + .iter() + .map(|arg| match arg { + ValueRef::Scalar(scalar) => scalar.clone().repeat(len).build(), + ValueRef::Column(col) => col.clone(), + }) + .collect(); + Ok(Value::Column(Column::Tuple { fields, len })) + } else { + // All args are scalars, so we return a scalar as result + let fields = args + .iter() + .map(|arg| match arg { + ValueRef::Scalar(scalar) => (*scalar).to_owned(), + ValueRef::Column(_) => unreachable!(), + }) + .collect(); + Ok(Value::Scalar(Scalar::Tuple(fields))) + } + }), + })) + }); + + registry.register_function_factory("get_tuple", |params, args_type| { + let idx = *params.first()?; + let tuple_tys = match args_type.get(0) { + Some(DataType::Tuple(tys)) => tys, + _ => return None, + }; + if idx >= tuple_tys.len() { + return None; + } + + Some(Arc::new(Function { + signature: FunctionSignature { + name: "get_tuple", + args_type: vec![DataType::Tuple(tuple_tys.to_vec())], + return_type: tuple_tys[idx].clone(), + property: FunctionProperty::default(), + }, + calc_domain: Box::new(move |args_domain, _| { + args_domain[0].as_tuple().unwrap()[idx].clone() + }), + eval: Box::new(move |args, _| match &args[0] { + ValueRef::Scalar(ScalarRef::Tuple(fields)) => { + Ok(Value::Scalar(fields[idx].to_owned())) + } + ValueRef::Column(Column::Tuple { fields, .. }) => { + Ok(Value::Column(fields[idx].to_owned())) + } + _ => unreachable!(), + }), + })) + }); + + registry.register_function_factory("get_tuple", |params, args_type| { + let idx = *params.first()?; + let tuple_tys = match args_type.get(0) { + Some(DataType::Nullable(box DataType::Tuple(tys))) => tys, + _ => return None, + }; + if idx >= tuple_tys.len() { + return None; + } + + Some(Arc::new(Function { + signature: FunctionSignature { + name: "get_tuple", + args_type: vec![DataType::Nullable(Box::new(DataType::Tuple( + tuple_tys.to_vec(), + )))], + return_type: DataType::Nullable(Box::new(tuple_tys[idx].clone())), + property: FunctionProperty::default(), + }, + calc_domain: Box::new(move |args_domain, _| { + let NullableDomain { has_null, value } = args_domain[0].as_nullable().unwrap(); + let value = value.as_ref().map(|value| { + let fields = value.as_tuple().unwrap(); + Box::new(fields[idx].clone()) + }); + Domain::Nullable(NullableDomain { + has_null: *has_null, + value, + }) + }), + eval: Box::new(move |args, _| match &args[0] { + ValueRef::Scalar(ScalarRef::Null) => Ok(Value::Scalar(Scalar::Null)), + ValueRef::Scalar(ScalarRef::Tuple(fields)) => { + Ok(Value::Scalar(fields[idx].to_owned())) + } + ValueRef::Column(Column::Nullable(box NullableColumn { + column: Column::Tuple { fields, .. }, + validity, + })) => Ok(Value::Column(Column::Nullable(Box::new(NullableColumn { + column: fields[idx].to_owned(), + validity: validity.clone(), + })))), + _ => unreachable!(), + }), + })) + }); + + registry +} + +fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Column)]) { + let result = try { + let raw_expr = parse_raw_expr( + text, + &columns + .iter() + .map(|(name, ty, _)| (*name, ty.clone())) + .collect::>(), + ); + + let fn_registry = builtin_functions(); + let (expr, output_ty) = type_check::check(&raw_expr, &fn_registry)?; + + let remote_expr = RemoteExpr::from_expr(expr); + let expr = remote_expr.into_expr(&fn_registry).unwrap(); + + let input_domains = columns + .iter() + .map(|(_, _, col)| col.domain()) + .collect::>(); + + let domain_calculator = DomainCalculator::new(input_domains.clone()); + let output_domain = domain_calculator.calculate(&expr)?; + + let num_rows = columns.iter().map(|col| col.2.len()).max().unwrap_or(0); + let chunk = Chunk::new( + columns + .iter() + .map(|(_, _, col)| Value::Column(col.clone())) + .collect::>(), + num_rows, + ); + + columns.iter().for_each(|(_, _, col)| { + test_arrow_conversion(col); + }); + + let evaluator = Evaluator { + input_columns: chunk, + context: FunctionContext::default(), + }; + let result = evaluator.run(&expr)?; + + ( + raw_expr, + expr, + input_domains, + output_ty, + output_domain, + result, + ) + }; + + match result { + Ok((raw_expr, expr, input_domains, output_ty, output_domain, result)) => { + writeln!(file, "ast : {text}").unwrap(); + writeln!(file, "raw expr : {raw_expr}").unwrap(); + writeln!(file, "checked expr : {expr}").unwrap(); + + match result { + Value::Scalar(output_scalar) => { + writeln!(file, "output type : {output_ty}").unwrap(); + writeln!(file, "output domain : {output_domain}").unwrap(); + writeln!(file, "output : {}", output_scalar.as_ref()).unwrap(); + } + Value::Column(output_col) => { + test_arrow_conversion(&output_col); + + let mut table = Table::new(); + table.load_preset("||--+-++| ++++++"); + + let mut header = vec!["".to_string()]; + header.extend(columns.iter().map(|(name, _, _)| name.to_string())); + header.push("Output".to_string()); + table.set_header(header); + + let mut type_row = vec!["Type".to_string()]; + type_row.extend(columns.iter().map(|(_, ty, _)| ty.to_string())); + type_row.push(output_ty.to_string()); + table.add_row(type_row); + + let mut domain_row = vec!["Domain".to_string()]; + domain_row.extend(input_domains.iter().map(|domain| domain.to_string())); + domain_row.push(output_domain.to_string()); + table.add_row(domain_row); + + for i in 0..output_col.len() { + let mut row = vec![format!("Row {i}")]; + for (_, _, col) in columns.iter() { + let value = col.index(i).unwrap(); + row.push(format!("{}", value)); + } + row.push(format!("{}", output_col.index(i).unwrap())); + table.add_row(row); + } + + writeln!(file, "evaluation:\n{table}").unwrap(); + + let mut table = Table::new(); + table.load_preset("||--+-++| ++++++"); + + table.set_header(&["Column", "Data"]); + + for (name, _, col) in columns.iter() { + table.add_row(&[name.to_string(), format!("{col:?}")]); + } + + table.add_row(["Output".to_string(), format!("{output_col:?}")]); + + writeln!(file, "evaluation (internal):\n{table}").unwrap(); + } + } + write!(file, "\n\n").unwrap(); + } + Err((Some(span), msg)) => { + writeln!(file, "{}\n", span.display_error((text.to_string(), msg))).unwrap(); + } + Err((None, msg)) => { + writeln!(file, "error: {}\n", msg).unwrap(); + } + } +} + +fn test_arrow_conversion(col: &Column) { + let arrow_col = col.as_arrow(); + let new_col = Column::from_arrow(&*arrow_col); + assert_eq!(col, &new_col, "arrow conversion went wrong"); +} diff --git a/common/expression/tests/it/kernel.rs b/common/expression/tests/it/kernel.rs new file mode 100644 index 0000000000000..f0f92779f13e5 --- /dev/null +++ b/common/expression/tests/it/kernel.rs @@ -0,0 +1,155 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::io::Write; + +use common_expression::types::nullable::NullableColumn; +use common_expression::types::string::StringColumn; +use common_expression::Chunk; +use common_expression::Column; +use common_expression::Value; +use goldenfile::Mint; + +#[test] +pub fn test_pass() { + let mut mint = Mint::new("tests/it/testdata"); + let mut file = mint.new_goldenfile("kernel-pass.txt").unwrap(); + + run_filter( + &mut file, + Column::Boolean(vec![true, false, false, false, true].into()), + &[ + Column::Int32(vec![0, 1, 2, 3, -4].into()), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12, 13, 14].into()), + validity: vec![false, true, false, false, false].into(), + })), + Column::Null { len: 5 }, + Column::Nullable(Box::new(NullableColumn { + column: Column::String(StringColumn { + data: "abcde".as_bytes().to_vec().into(), + offsets: vec![0, 1, 2, 3, 4, 5].into(), + }), + validity: vec![true, true, false, false, false].into(), + })), + ], + ); + + run_filter( + &mut file, + Column::Nullable(Box::new(NullableColumn { + column: Column::Boolean(vec![true, true, false, true, true].into()), + validity: vec![false, true, true, false, false].into(), + })), + &[ + Column::Int32(vec![0, 1, 2, 3, -4].into()), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12, 13, 14].into()), + validity: vec![false, true, false, false, false].into(), + })), + Column::Null { len: 5 }, + Column::Nullable(Box::new(NullableColumn { + column: Column::String(StringColumn { + data: "xyzab".as_bytes().to_vec().into(), + offsets: vec![0, 1, 2, 3, 4, 5].into(), + }), + validity: vec![false, true, true, false, false].into(), + })), + ], + ); + + run_concat(&mut file, vec![ + vec![ + Column::Int32(vec![0, 1, 2, 3, -4].into()), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12, 13, 14].into()), + validity: vec![false, true, false, false, false].into(), + })), + Column::Null { len: 5 }, + Column::EmptyArray { len: 5 }, + Column::Nullable(Box::new(NullableColumn { + column: Column::String(StringColumn { + data: "xyzab".as_bytes().to_vec().into(), + offsets: vec![0, 1, 2, 3, 4, 5].into(), + }), + validity: vec![false, true, true, false, false].into(), + })), + ], + vec![ + Column::Int32(vec![5, 6].into()), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![15, 16].into()), + validity: vec![false, true].into(), + })), + Column::Null { len: 2 }, + Column::EmptyArray { len: 2 }, + Column::Nullable(Box::new(NullableColumn { + column: Column::String(StringColumn { + data: "xy".as_bytes().to_vec().into(), + offsets: vec![0, 1, 2].into(), + }), + validity: vec![false, true].into(), + })), + ], + ]); +} + +fn run_filter(file: &mut impl Write, predicate: Column, columns: &[Column]) { + let len = columns.get(0).map(|c| c.len()).unwrap_or(1); + let columns = columns.iter().map(|c| Value::Column(c.clone())).collect(); + + let chunk = Chunk::new(columns, len); + + let predicate = Value::Column(predicate); + let result = chunk.clone().filter(&predicate); + + match result { + Ok(result_chunk) => { + writeln!(file, "Filter: {predicate:?}").unwrap(); + writeln!(file, "Source:\n{chunk:?}").unwrap(); + writeln!(file, "Result:\n{result_chunk:?}").unwrap(); + write!(file, "\n\n").unwrap(); + } + Err(err) => { + writeln!(file, "error: {}\n", err.message()).unwrap(); + } + } +} + +fn run_concat(file: &mut impl Write, columns: Vec>) { + let chunks: Vec = columns + .iter() + .map(|cs| { + let num_rows = cs.get(0).map(|c| c.len()).unwrap_or(1); + let cs = cs.iter().map(|c| Value::Column(c.clone())).collect(); + Chunk::new(cs, num_rows) + }) + .collect(); + + let result = Chunk::concat(&chunks); + + match result { + Ok(result_chunk) => { + for (i, c) in chunks.iter().enumerate() { + writeln!(file, "Concat-Column {}:", i).unwrap(); + writeln!(file, "{:?}", c).unwrap(); + } + writeln!(file, "Result:\n{result_chunk:?}").unwrap(); + write!(file, "\n\n").unwrap(); + } + Err(err) => { + writeln!(file, "error: {}\n", err.message()).unwrap(); + } + } +} diff --git a/common/expression/tests/it/testdata/kernel-pass.txt b/common/expression/tests/it/testdata/kernel-pass.txt new file mode 100644 index 0000000000000..bd056a8ce6afd --- /dev/null +++ b/common/expression/tests/it/testdata/kernel-pass.txt @@ -0,0 +1,74 @@ +Filter: Column(Boolean([0b___10001])) +Source: ++-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column ID | Column Data | ++-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ +| 0 | Column(Int32([0, 1, 2, 3, -4])) | +| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 11, 12, 13, 14]), validity: [0b___00010] })) | +| 2 | Column(Null { len: 5 }) | +| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [97, 98, 99, 100, 101], offsets: [0, 1, 2, 3, 4, 5] }), validity: [0b___00011] })) | ++-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ +Result: ++-----------+-----------------------------------------------------------------------------------------------------------------------------------+ +| Column ID | Column Data | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------+ +| 0 | Column(Int32([0, -4])) | +| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 14]), validity: [0b______00] })) | +| 2 | Column(Null { len: 2 }) | +| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [97, 101], offsets: [0, 1, 2] }), validity: [0b______01] })) | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------+ + + +Filter: Column(Nullable(NullableColumn { column: Boolean([0b___11011]), validity: [0b___00110] })) +Source: ++-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column ID | Column Data | ++-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| 0 | Column(Int32([0, 1, 2, 3, -4])) | +| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 11, 12, 13, 14]), validity: [0b___00010] })) | +| 2 | Column(Null { len: 5 }) | +| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 121, 122, 97, 98], offsets: [0, 1, 2, 3, 4, 5] }), validity: [0b___00110] })) | ++-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +Result: ++-----------+----------------------------------------------------------------------------------------------------------------------------+ +| Column ID | Column Data | ++-----------+----------------------------------------------------------------------------------------------------------------------------+ +| 0 | Column(Int32([1])) | +| 1 | Column(Nullable(NullableColumn { column: UInt8([11]), validity: [0b_______1] })) | +| 2 | Column(Null { len: 1 }) | +| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [121], offsets: [0, 1] }), validity: [0b_______1] })) | ++-----------+----------------------------------------------------------------------------------------------------------------------------+ + + +Concat-Column 0: ++-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column ID | Column Data | ++-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| 0 | Column(Int32([0, 1, 2, 3, -4])) | +| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 11, 12, 13, 14]), validity: [0b___00010] })) | +| 2 | Column(Null { len: 5 }) | +| 3 | Column(EmptyArray { len: 5 }) | +| 4 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 121, 122, 97, 98], offsets: [0, 1, 2, 3, 4, 5] }), validity: [0b___00110] })) | ++-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +Concat-Column 1: ++-----------+------------------------------------------------------------------------------------------------------------------------------------+ +| Column ID | Column Data | ++-----------+------------------------------------------------------------------------------------------------------------------------------------+ +| 0 | Column(Int32([5, 6])) | +| 1 | Column(Nullable(NullableColumn { column: UInt8([15, 16]), validity: [0b______10] })) | +| 2 | Column(Null { len: 2 }) | +| 3 | Column(EmptyArray { len: 2 }) | +| 4 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 121], offsets: [0, 1, 2] }), validity: [0b______10] })) | ++-----------+------------------------------------------------------------------------------------------------------------------------------------+ +Result: ++-----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column ID | Column Data | ++-----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| 0 | Column(Int32([0, 1, 2, 3, -4, 5, 6])) | +| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 11, 12, 13, 14, 15, 16]), validity: [0b_1000010] })) | +| 2 | Column(Null { len: 7 }) | +| 3 | Column(EmptyArray { len: 7 }) | +| 4 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 121, 122, 97, 98, 120, 121], offsets: [0, 1, 2, 3, 4, 5, 6, 7] }), validity: [0b_1000110] })) | ++-----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + From 2b39a32ee1cd1bb61f3fa5127386c8b7e2b1b047 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Mon, 8 Aug 2022 21:51:52 +0800 Subject: [PATCH 03/59] feat(query): add concat/filter kernel for chunk --- common/expression/Cargo.toml | 1 - common/expression/src/kernels/concat.rs | 2 +- common/expression/src/kernels/filter.rs | 6 +++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/common/expression/Cargo.toml b/common/expression/Cargo.toml index 74105722d406e..6d48d824588f5 100755 --- a/common/expression/Cargo.toml +++ b/common/expression/Cargo.toml @@ -14,7 +14,6 @@ test = false common-arrow = { path = "../arrow" } common-exception = { path = "../exception" } - # Github dependencies # Crates.io dependencies diff --git a/common/expression/src/kernels/concat.rs b/common/expression/src/kernels/concat.rs index 5c617d2df989e..860193dc284f9 100644 --- a/common/expression/src/kernels/concat.rs +++ b/common/expression/src/kernels/concat.rs @@ -87,7 +87,7 @@ impl Column { let mut bitmaps = Vec::with_capacity(columns.len()); let mut inners = Vec::with_capacity(columns.len()); for c in columns { - let nullable_column = NullableType::::try_downcast_column(&c).unwrap(); + let nullable_column = NullableType::::try_downcast_column(c).unwrap(); inners.push(nullable_column.column); bitmaps.push(Column::Boolean(nullable_column.validity)); } diff --git a/common/expression/src/kernels/filter.rs b/common/expression/src/kernels/filter.rs index b987e1e243f62..c306f7e0fb6eb 100644 --- a/common/expression/src/kernels/filter.rs +++ b/common/expression/src/kernels/filter.rs @@ -83,15 +83,15 @@ impl Chunk { if let Some(v) = NullableType::::try_downcast_scalar(&v.as_ref()) { Some(Value::Scalar(v.unwrap_or_default())) } else { - BooleanType::try_downcast_scalar(&v.as_ref()).map(|c| Value::Scalar(c)) + BooleanType::try_downcast_scalar(&v.as_ref()).map(Value::Scalar) } } Value::Column(c) => { - if let Some(nb) = NullableType::::try_downcast_column(&c) { + if let Some(nb) = NullableType::::try_downcast_column(c) { let validity = common_arrow::arrow::bitmap::and(&nb.validity, &nb.column); Some(Value::Column(validity)) } else { - BooleanType::try_downcast_column(&c).map(|c| Value::Column(c)) + BooleanType::try_downcast_column(c).map(Value::Column) } } } From 0c8e2f4ec8cc091bdfb50b4c8a375cf54fc6d143 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Mon, 8 Aug 2022 21:59:28 +0800 Subject: [PATCH 04/59] feat(query): fix lint --- common/expression/src/kernels/concat.rs | 4 ++-- common/expression/src/kernels/filter.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/common/expression/src/kernels/concat.rs b/common/expression/src/kernels/concat.rs index 860193dc284f9..d3e256af85244 100644 --- a/common/expression/src/kernels/concat.rs +++ b/common/expression/src/kernels/concat.rs @@ -1,6 +1,6 @@ -// Copyright 2021 Datafuse Labs. +// Copyright 2022 Datafuse Labs. // -// Licensed under the Apach&e License, Version 2.0 (the "License"); +// Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // diff --git a/common/expression/src/kernels/filter.rs b/common/expression/src/kernels/filter.rs index c306f7e0fb6eb..67d3bd59dc954 100644 --- a/common/expression/src/kernels/filter.rs +++ b/common/expression/src/kernels/filter.rs @@ -1,6 +1,6 @@ -// Copyright 2021 Datafuse Labs. +// Copyright 2022 Datafuse Labs. // -// Licensed under the Apach&e License, Version 2.0 (the "License"); +// Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // From 559e5e9421f5cdbeb530d282ed8af414e60e7fbf Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 9 Aug 2022 09:16:26 +0800 Subject: [PATCH 05/59] feat(query): use macro to replace column init builder --- common/expression/src/kernels/concat.rs | 60 ++++++++++++++++++++----- common/expression/src/kernels/filter.rs | 48 ++++++++++++++------ common/expression/src/types/array.rs | 4 ++ 3 files changed, 88 insertions(+), 24 deletions(-) diff --git a/common/expression/src/kernels/concat.rs b/common/expression/src/kernels/concat.rs index d3e256af85244..abae1d8ffb04e 100644 --- a/common/expression/src/kernels/concat.rs +++ b/common/expression/src/kernels/concat.rs @@ -12,11 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_arrow::arrow::bitmap::MutableBitmap; use common_arrow::arrow::buffer::Buffer; use common_exception::ErrorCode; use common_exception::Result; +use crate::types::array::ArrayColumnBuilder; use crate::types::nullable::NullableColumn; +use crate::types::string::StringColumnBuilder; use crate::types::AnyType; use crate::types::ArrayType; use crate::types::BooleanType; @@ -68,6 +71,7 @@ impl Column { if columns.len() == 1 { return columns[0].clone(); } + let capacity = columns.iter().map(|c| c.len()).sum(); with_number_mapped_type!(SRC_TYPE, match &columns[0] { Column::SRC_TYPE(_) => { @@ -78,11 +82,28 @@ impl Column { } NumberType::::upcast_column(Self::concat_primitive_types(&values)) } - Column::Null { .. } => Self::concat_scalar_types::(columns), - Column::EmptyArray { .. } => Self::concat_scalar_types::(columns), - Column::Boolean(_) => Self::concat_scalar_types::(columns), - Column::String(_) => Self::concat_scalar_types::(columns), - Column::Array(_) => Self::concat_scalar_types::>(columns), + Column::Null { .. } => { + let builder: usize = 0; + Self::concat_scalar_types::(builder, columns) + } + Column::EmptyArray { .. } => { + let builder: usize = 0; + Self::concat_scalar_types::(builder, columns) + } + Column::Boolean(_) => { + let builder = MutableBitmap::with_capacity(capacity); + Self::concat_scalar_types::(builder, columns) + } + Column::String(_) => { + let data_capacity = columns.iter().map(|c| c.memory_size() - c.len() * 8).sum(); + let builder = StringColumnBuilder::with_capacity(capacity, data_capacity); + Self::concat_scalar_types::(builder, columns) + } + Column::Array(col) => { + let mut builder = ArrayColumnBuilder::::from_column(col.slice(0..0)); + builder.reserve(capacity); + Self::concat_scalar_types::>(builder, columns) + } Column::Nullable(_) => { let mut bitmaps = Vec::with_capacity(columns.len()); let mut inners = Vec::with_capacity(columns.len()); @@ -91,12 +112,29 @@ impl Column { inners.push(nullable_column.column); bitmaps.push(Column::Boolean(nullable_column.validity)); } + let column = Self::concat(&inners); - let validity = Self::concat_scalar_types::(&bitmaps); + let validity_builder = MutableBitmap::with_capacity(capacity); + let validity = Self::concat_scalar_types::(validity_builder, &bitmaps); let validity = BooleanType::try_downcast_column(&validity).unwrap(); + Column::Nullable(Box::new(NullableColumn { column, validity })) } - Column::Tuple { .. } => Self::concat_scalar_types::(columns), + Column::Tuple { fields, .. } => { + let fields = (0..fields.len()) + .map(|idx| { + let cs: Vec = columns + .iter() + .map(|col| col.as_tuple().unwrap().0[idx].clone()) + .collect(); + Self::concat(&cs) + }) + .collect(); + Column::Tuple { + fields, + len: capacity, + } + } }) } @@ -109,14 +147,14 @@ impl Column { results.into() } - fn concat_scalar_types(columns: &[Column]) -> Column { - let capacity = columns.iter().map(|c| c.len()).sum(); + fn concat_scalar_types( + mut builder: T::ColumnBuilder, + columns: &[Column], + ) -> Column { let columns: Vec = columns .iter() .map(|c| T::try_downcast_column(c).unwrap()) .collect(); - - let mut builder = T::column_init_builder(&columns[0], capacity); for col in columns { for item in T::iter_column(&col) { T::push_item(&mut builder, item) diff --git a/common/expression/src/kernels/filter.rs b/common/expression/src/kernels/filter.rs index 67d3bd59dc954..0159b97ee6750 100644 --- a/common/expression/src/kernels/filter.rs +++ b/common/expression/src/kernels/filter.rs @@ -15,11 +15,14 @@ use common_arrow::arrow::bitmap::utils::BitChunkIterExact; use common_arrow::arrow::bitmap::utils::BitChunksExact; use common_arrow::arrow::bitmap::Bitmap; +use common_arrow::arrow::bitmap::MutableBitmap; use common_arrow::arrow::buffer::Buffer; use common_exception::ErrorCode; use common_exception::Result; +use crate::types::array::ArrayColumnBuilder; use crate::types::nullable::NullableColumn; +use crate::types::string::StringColumnBuilder; use crate::types::AnyType; use crate::types::ArrayType; use crate::types::BooleanType; @@ -100,19 +103,39 @@ impl Chunk { impl Column { pub fn filter(&self, filter: &Bitmap) -> Column { + let length = filter.len() - filter.unset_bits(); + if length == self.len() { + return self.clone(); + } + with_number_type!(SRC_TYPE, match self { Column::SRC_TYPE(values) => { Column::SRC_TYPE(Self::filter_primitive_types(values, filter)) } - Column::Null { .. } | Column::EmptyArray { .. } => - self.slice(0..filter.len() - filter.unset_bits()), - Column::Boolean(bm) => Self::filter_scalar_types::(bm, filter), - Column::String(column) => Self::filter_scalar_types::(column, filter), - Column::Array(column) => - Self::filter_scalar_types::>(column, filter), + Column::Null { .. } | Column::EmptyArray { .. } => self.slice(0..length), + + Column::Boolean(bm) => Self::filter_scalar_types::( + bm, + MutableBitmap::with_capacity(length), + filter + ), + Column::String(column) => Self::filter_scalar_types::( + column, + StringColumnBuilder::with_capacity(length, 0), + filter + ), + Column::Array(column) => { + let mut builder = ArrayColumnBuilder::::from_column(column.slice(0..0)); + builder.reserve(length); + Self::filter_scalar_types::>(column, builder, filter) + } Column::Nullable(c) => { let column = Self::filter(&c.column, filter); - let validity = Self::filter_scalar_types::(&c.validity, filter); + let validity = Self::filter_scalar_types::( + &c.validity, + MutableBitmap::with_capacity(length), + filter, + ); Column::Nullable(Box::new(NullableColumn { column, validity: BooleanType::try_downcast_column(&validity).unwrap(), @@ -126,13 +149,12 @@ impl Column { }) } - fn filter_scalar_types(col: &T::Column, filter: &Bitmap) -> Column { - let length = filter.len() - filter.unset_bits(); - if length == T::column_len(col) { - return T::upcast_column(col.clone()); - } + fn filter_scalar_types( + col: &T::Column, + mut builder: T::ColumnBuilder, + filter: &Bitmap, + ) -> Column { const CHUNK_SIZE: usize = 64; - let mut builder = T::column_init_builder(col, length); let (mut slice, offset, mut length) = filter.as_slice(); let mut start_index: usize = 0; diff --git a/common/expression/src/types/array.rs b/common/expression/src/types/array.rs index f82b70d1bb68b..f27e154780525 100755 --- a/common/expression/src/types/array.rs +++ b/common/expression/src/types/array.rs @@ -249,6 +249,10 @@ impl ArrayColumnBuilder { self.offsets.len() - 1 } + pub fn reserve(&mut self, additional: usize) { + self.offsets.reserve(additional); + } + pub fn push(&mut self, item: T::Column) { let other_col = T::column_to_builder(item); T::append_builder(&mut self.builder, &other_col); From a47ab4a89c69fc35a94dd2bfc25eed2e9a6f9ba9 Mon Sep 17 00:00:00 2001 From: lichuang Date: Tue, 9 Aug 2022 10:58:03 +0800 Subject: [PATCH 06/59] feat: add alter share tenants sql --- common/ast/src/ast/statements/share.rs | 34 +++++++ common/ast/src/ast/statements/statement.rs | 2 + common/ast/src/parser/statement.rs | 18 ++++ common/ast/src/parser/token.rs | 4 + common/ast/tests/it/parser.rs | 3 + common/ast/tests/it/testdata/statement.txt | 90 +++++++++++++++++++ .../interpreters/interpreter_factory_v2.rs | 4 + .../interpreter_share_alter_tenants.rs | 81 +++++++++++++++++ query/src/interpreters/mod.rs | 2 + query/src/sql/planner/binder/ddl/share.rs | 27 ++++++ query/src/sql/planner/binder/mod.rs | 3 + query/src/sql/planner/format/display_plan.rs | 1 + query/src/sql/planner/plans/mod.rs | 3 + query/src/sql/planner/plans/share.rs | 15 ++++ 14 files changed, 287 insertions(+) create mode 100644 query/src/interpreters/interpreter_share_alter_tenants.rs diff --git a/common/ast/src/ast/statements/share.rs b/common/ast/src/ast/statements/share.rs index a64ee1ddc8c70..f53da8a10903a 100644 --- a/common/ast/src/ast/statements/share.rs +++ b/common/ast/src/ast/statements/share.rs @@ -96,3 +96,37 @@ impl Display for RevokeShareObjectStmt<'_> { Ok(()) } } + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AlterShareAccountsStmt<'a> { + pub share: Identifier<'a>, + pub if_exists: bool, + pub tenants: Vec>, + pub add: bool, +} + +impl Display for AlterShareAccountsStmt<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "ALTER SHARE ")?; + if self.if_exists { + write!(f, "IF EXISTS ")?; + } + write!(f, "{}", self.share)?; + if self.add { + write!(f, " ADD TENANTS ")?; + } else { + write!(f, " REMOVE TENANTS ")?; + } + let mut first = true; + for account in self.tenants.iter() { + if !first { + write!(f, " , ")?; + } else { + first = false; + } + write!(f, " {} ", account)?; + } + + Ok(()) + } +} diff --git a/common/ast/src/ast/statements/statement.rs b/common/ast/src/ast/statements/statement.rs index a60c7f0cd2af4..eef749eea4879 100644 --- a/common/ast/src/ast/statements/statement.rs +++ b/common/ast/src/ast/statements/statement.rs @@ -163,6 +163,7 @@ pub enum Statement<'a> { DropShare(DropShareStmt<'a>), GrantShareObject(GrantShareObjectStmt<'a>), RevokeShareObject(RevokeShareObjectStmt<'a>), + AlterShareAccounts(AlterShareAccountsStmt<'a>), } #[derive(Debug, Clone, PartialEq)] @@ -368,6 +369,7 @@ impl<'a> Display for Statement<'a> { Statement::DropShare(stmt) => write!(f, "{stmt}")?, Statement::GrantShareObject(stmt) => write!(f, "{stmt}")?, Statement::RevokeShareObject(stmt) => write!(f, "{stmt}")?, + Statement::AlterShareAccounts(stmt) => write!(f, "{stmt}")?, } Ok(()) } diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 70fdfb2ea1ae5..37e528fa5dcb3 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -794,6 +794,19 @@ pub fn statement(i: Input) -> IResult { }) }, ); + let alter_share_accounts = map( + rule! { + ALTER ~ SHARE ~ (IF ~ EXISTS )? ~ #ident ~ #alter_add_share_accounts ~ TENANTS ~ Eq ~ #comma_separated_list1(ident) + }, + |(_, _, opt_if_exists, share, add, _, _, tenants)| { + Statement::AlterShareAccounts(AlterShareAccountsStmt { + share, + if_exists: opt_if_exists.is_some(), + add, + tenants, + }) + }, + ); let statement_body = alt(( rule!( @@ -886,6 +899,7 @@ pub fn statement(i: Input) -> IResult { | #drop_share: "`DROP SHARE [IF EXISTS] `" | #grant_share_object: "`GRANT { USAGE | SELECT | REFERENCE_USAGE } ON { DATABASE db | TABLE db.table } TO SHARE `" | #revoke_share_object: "`REVOKE { USAGE | SELECT | REFERENCE_USAGE } ON { DATABASE db | TABLE db.table } FROM SHARE `" + | #alter_share_accounts: "`ALTER SHARE [IF EXISTS] {ADD | REMOVE} TENANTS = tenant [, tenant, ...]`" ), )); @@ -1054,6 +1068,10 @@ pub fn priv_share_type(i: Input) -> IResult { ))(i) } +pub fn alter_add_share_accounts(i: Input) -> IResult { + alt((value(true, rule! { ADD }), value(false, rule! { REMOVE })))(i) +} + pub fn grant_share_object_name(i: Input) -> IResult { let database = map( rule! { diff --git a/common/ast/src/parser/token.rs b/common/ast/src/parser/token.rs index 0c89329b03f9b..d57bfcd13951b 100644 --- a/common/ast/src/parser/token.rs +++ b/common/ast/src/parser/token.rs @@ -237,6 +237,8 @@ pub enum TokenKind { // reserved list. #[token("ALL", ignore(ascii_case))] ALL, + #[token("ADD", ignore(ascii_case))] + ADD, #[token("ANY", ignore(ascii_case))] ANY, #[token("SOME", ignore(ascii_case))] @@ -627,6 +629,8 @@ pub enum TokenKind { TEXT, #[token("TENANTSETTING", ignore(ascii_case))] TENANTSETTING, + #[token("TENANTS", ignore(ascii_case))] + TENANTS, #[token("THEN", ignore(ascii_case))] THEN, #[token("TIMESTAMP", ignore(ascii_case))] diff --git a/common/ast/tests/it/parser.rs b/common/ast/tests/it/parser.rs index a3cca61f5d48e..5e62df6d04106 100644 --- a/common/ast/tests/it/parser.rs +++ b/common/ast/tests/it/parser.rs @@ -264,6 +264,9 @@ fn test_statement() { r#"GRANT SELECT ON TABLE db1.tb1 TO SHARE a;"#, r#"REVOKE USAGE ON DATABASE db1 FROM SHARE a;"#, r#"REVOKE SELECT ON TABLE db1.tb1 FROM SHARE a;"#, + r#"ALTER SHARE a ADD TENANTS = b,c;"#, + r#"ALTER SHARE IF EXISTS a ADD TENANTS = b,c;"#, + r#"ALTER SHARE IF EXISTS a REMOVE TENANTS = b,c;"#, ]; for case in cases { diff --git a/common/ast/tests/it/testdata/statement.txt b/common/ast/tests/it/testdata/statement.txt index e51587f497cb0..c07f2bb2ce0e3 100644 --- a/common/ast/tests/it/testdata/statement.txt +++ b/common/ast/tests/it/testdata/statement.txt @@ -6061,3 +6061,93 @@ RevokeShareObject( ) +---------- Input ---------- +ALTER SHARE a ADD TENANTS = b,c; +---------- Output --------- +ALTER SHARE a ADD TENANTS b , c +---------- AST ------------ +AlterShareAccounts( + AlterShareAccountsStmt { + share: Identifier { + name: "a", + quote: None, + span: Ident(12..13), + }, + if_exists: false, + tenants: [ + Identifier { + name: "b", + quote: None, + span: Ident(28..29), + }, + Identifier { + name: "c", + quote: None, + span: Ident(30..31), + }, + ], + add: true, + }, +) + + +---------- Input ---------- +ALTER SHARE IF EXISTS a ADD TENANTS = b,c; +---------- Output --------- +ALTER SHARE IF EXISTS a ADD TENANTS b , c +---------- AST ------------ +AlterShareAccounts( + AlterShareAccountsStmt { + share: Identifier { + name: "a", + quote: None, + span: Ident(22..23), + }, + if_exists: true, + tenants: [ + Identifier { + name: "b", + quote: None, + span: Ident(38..39), + }, + Identifier { + name: "c", + quote: None, + span: Ident(40..41), + }, + ], + add: true, + }, +) + + +---------- Input ---------- +ALTER SHARE IF EXISTS a REMOVE TENANTS = b,c; +---------- Output --------- +ALTER SHARE IF EXISTS a REMOVE TENANTS b , c +---------- AST ------------ +AlterShareAccounts( + AlterShareAccountsStmt { + share: Identifier { + name: "a", + quote: None, + span: Ident(22..23), + }, + if_exists: true, + tenants: [ + Identifier { + name: "b", + quote: None, + span: Ident(41..42), + }, + Identifier { + name: "c", + quote: None, + span: Ident(43..44), + }, + ], + add: false, + }, +) + + diff --git a/query/src/interpreters/interpreter_factory_v2.rs b/query/src/interpreters/interpreter_factory_v2.rs index fb44e55ae3ae5..3ac5668b4e16a 100644 --- a/query/src/interpreters/interpreter_factory_v2.rs +++ b/query/src/interpreters/interpreter_factory_v2.rs @@ -267,6 +267,10 @@ impl InterpreterFactoryV2 { ctx, *p.clone(), )?)), + Plan::AlterShareAccounts(p) => Ok(Arc::new(AlterShareTenantsInterpreter::try_create( + ctx, + *p.clone(), + )?)), } } } diff --git a/query/src/interpreters/interpreter_share_alter_tenants.rs b/query/src/interpreters/interpreter_share_alter_tenants.rs new file mode 100644 index 0000000000000..f73fa77f0c6c7 --- /dev/null +++ b/query/src/interpreters/interpreter_share_alter_tenants.rs @@ -0,0 +1,81 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use common_datavalues::chrono::Utc; +use common_exception::Result; +use common_meta_api::ShareApi; +use common_meta_app::share::AddShareAccountsReq; +use common_meta_app::share::RemoveShareAccountsReq; +use common_meta_app::share::ShareNameIdent; +use common_streams::DataBlockStream; +use common_streams::SendableDataBlockStream; + +use crate::interpreters::Interpreter; +use crate::sessions::QueryContext; +use crate::sessions::TableContext; +use crate::sql::plans::share::AlterShareAccountsPlan; + +pub struct AlterShareTenantsInterpreter { + ctx: Arc, + plan: AlterShareAccountsPlan, +} + +impl AlterShareTenantsInterpreter { + pub fn try_create(ctx: Arc, plan: AlterShareAccountsPlan) -> Result { + Ok(AlterShareTenantsInterpreter { ctx, plan }) + } +} + +#[async_trait::async_trait] +impl Interpreter for AlterShareTenantsInterpreter { + fn name(&self) -> &str { + "AlterShareTenantsInterpreter" + } + + async fn execute(&self) -> Result { + let tenant = self.ctx.get_tenant(); + let user_mgr = self.ctx.get_user_manager(); + let meta_api = user_mgr.get_meta_store_client(); + if self.plan.add { + let req = AddShareAccountsReq { + share_name: ShareNameIdent { + tenant, + share_name: self.plan.share.clone(), + }, + if_exists: self.plan.if_exists, + accounts: self.plan.accounts.clone(), + share_on: Utc::now(), + }; + meta_api.add_share_accounts(req).await?; + } else { + let req = RemoveShareAccountsReq { + share_name: ShareNameIdent { + tenant, + share_name: self.plan.share.clone(), + }, + if_exists: self.plan.if_exists, + accounts: self.plan.accounts.clone(), + }; + meta_api.remove_share_accounts(req).await?; + } + + Ok(Box::pin(DataBlockStream::create( + self.plan.schema(), + None, + vec![], + ))) + } +} diff --git a/query/src/interpreters/mod.rs b/query/src/interpreters/mod.rs index b569a42d01fa0..5fa8eb6640e41 100644 --- a/query/src/interpreters/mod.rs +++ b/query/src/interpreters/mod.rs @@ -49,6 +49,7 @@ mod interpreter_role_revoke; mod interpreter_select; mod interpreter_select_v2; mod interpreter_setting; +mod interpreter_share_alter_tenants; mod interpreter_share_create; mod interpreter_share_drop; mod interpreter_share_grant_object; @@ -133,6 +134,7 @@ pub use interpreter_role_revoke::RevokeRoleInterpreter; pub use interpreter_select::SelectInterpreter; pub use interpreter_select_v2::SelectInterpreterV2; pub use interpreter_setting::SettingInterpreter; +pub use interpreter_share_alter_tenants::AlterShareTenantsInterpreter; pub use interpreter_share_create::CreateShareInterpreter; pub use interpreter_share_drop::DropShareInterpreter; pub use interpreter_share_grant_object::GrantShareObjectInterpreter; diff --git a/query/src/sql/planner/binder/ddl/share.rs b/query/src/sql/planner/binder/ddl/share.rs index 65f4a310bb4ea..7799b483b42fc 100644 --- a/query/src/sql/planner/binder/ddl/share.rs +++ b/query/src/sql/planner/binder/ddl/share.rs @@ -18,6 +18,7 @@ use common_exception::Result; use crate::sessions::TableContext; use crate::sql::binder::Binder; use crate::sql::normalize_identifier; +use crate::sql::plans::AlterShareAccountsPlan; use crate::sql::plans::CreateSharePlan; use crate::sql::plans::DropSharePlan; use crate::sql::plans::GrantShareObjectPlan; @@ -101,4 +102,30 @@ impl<'a> Binder { }; Ok(Plan::RevokeShareObject(Box::new(plan))) } + + pub(in crate::sql::planner::binder) async fn bind_alter_share_accounts( + &mut self, + stmt: &AlterShareAccountsStmt<'a>, + ) -> Result { + let AlterShareAccountsStmt { + share, + if_exists, + tenants, + add, + } = stmt; + + let share = normalize_identifier(share, &self.name_resolution_ctx).name; + let mut accounts = vec![]; + for tenant in tenants { + accounts.push(tenant.to_string()); + } + + let plan = AlterShareAccountsPlan { + share, + if_exists: *if_exists, + add: *add, + accounts, + }; + Ok(Plan::AlterShareAccounts(Box::new(plan))) + } } diff --git a/query/src/sql/planner/binder/mod.rs b/query/src/sql/planner/binder/mod.rs index a27b2b2da2fba..b7742b7b38b1c 100644 --- a/query/src/sql/planner/binder/mod.rs +++ b/query/src/sql/planner/binder/mod.rs @@ -311,6 +311,9 @@ impl<'a> Binder { Statement::RevokeShareObject(stmt) => { self.bind_revoke_share_object(stmt).await? } + Statement::AlterShareAccounts(stmt) => { + self.bind_alter_share_accounts(stmt).await? + } }; Ok(plan) } diff --git a/query/src/sql/planner/format/display_plan.rs b/query/src/sql/planner/format/display_plan.rs index cc81b3cfd187a..d0f2c0e5bdaff 100644 --- a/query/src/sql/planner/format/display_plan.rs +++ b/query/src/sql/planner/format/display_plan.rs @@ -98,6 +98,7 @@ impl Plan { Plan::DropShare(p) => Ok(format!("{:?}", p)), Plan::GrantShareObject(p) => Ok(format!("{:?}", p)), Plan::RevokeShareObject(p) => Ok(format!("{:?}", p)), + Plan::AlterShareAccounts(p) => Ok(format!("{:?}", p)), } } } diff --git a/query/src/sql/planner/plans/mod.rs b/query/src/sql/planner/plans/mod.rs index c1e725045af91..ae0078d3b4d9a 100644 --- a/query/src/sql/planner/plans/mod.rs +++ b/query/src/sql/planner/plans/mod.rs @@ -203,6 +203,7 @@ pub enum Plan { DropShare(Box), GrantShareObject(Box), RevokeShareObject(Box), + AlterShareAccounts(Box), } #[derive(Clone)] @@ -277,6 +278,7 @@ impl Display for Plan { Plan::DropShare(_) => write!(f, "DropShare"), Plan::GrantShareObject(_) => write!(f, "GrantShareObject"), Plan::RevokeShareObject(_) => write!(f, "RevokeShareObject"), + Plan::AlterShareAccounts(_) => write!(f, "AlterShareAccounts"), } } } @@ -343,6 +345,7 @@ impl Plan { Plan::DropShare(plan) => plan.schema(), Plan::GrantShareObject(plan) => plan.schema(), Plan::RevokeShareObject(plan) => plan.schema(), + Plan::AlterShareAccounts(plan) => plan.schema(), } } } diff --git a/query/src/sql/planner/plans/share.rs b/query/src/sql/planner/plans/share.rs index ea3662995708a..a18980d50016b 100644 --- a/query/src/sql/planner/plans/share.rs +++ b/query/src/sql/planner/plans/share.rs @@ -105,3 +105,18 @@ impl RevokeShareObjectPlan { Arc::new(DataSchema::empty()) } } + +// Alter Share Accounts Plan +#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] +pub struct AlterShareAccountsPlan { + pub share: String, + pub if_exists: bool, + pub accounts: Vec, + pub add: bool, +} + +impl AlterShareAccountsPlan { + pub fn schema(&self) -> DataSchemaRef { + Arc::new(DataSchema::empty()) + } +} From 9e1f482211976440f08b23b3951ab20b8fb0a8b1 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 9 Aug 2022 12:10:29 +0800 Subject: [PATCH 07/59] feat(query): add scatter take kernels --- common/expression/src/kernels/filter.rs | 4 +- common/expression/src/kernels/mod.rs | 2 + common/expression/src/kernels/scatter.rs | 187 ++++++++++++++++++ common/expression/src/kernels/take.rs | 125 ++++++++++++ common/expression/src/types.rs | 8 + common/expression/src/types/any.rs | 7 + common/expression/src/types/array.rs | 12 +- common/expression/src/types/boolean.rs | 7 + common/expression/src/types/empty_array.rs | 6 + common/expression/src/types/generic.rs | 7 + common/expression/src/types/map.rs | 24 +++ common/expression/src/types/null.rs | 6 + common/expression/src/types/nullable.rs | 18 ++ common/expression/src/types/number.rs | 7 + common/expression/src/types/string.rs | 12 +- common/expression/tests/it/kernel.rs | 80 ++++++++ .../tests/it/testdata/kernel-pass.txt | 60 ++++++ 17 files changed, 568 insertions(+), 4 deletions(-) create mode 100644 common/expression/src/kernels/scatter.rs create mode 100644 common/expression/src/kernels/take.rs diff --git a/common/expression/src/kernels/filter.rs b/common/expression/src/kernels/filter.rs index 0159b97ee6750..252bac6c71c41 100644 --- a/common/expression/src/kernels/filter.rs +++ b/common/expression/src/kernels/filter.rs @@ -52,7 +52,7 @@ impl Chunk { if s { Ok(self) } else { - Ok(Chunk::empty()) + Ok(self.slice(0..0)) } } Value::Column(bitmap) => { @@ -61,7 +61,7 @@ impl Chunk { 0 => Ok(self), _ => { if count_zeros == self.num_rows() { - return Ok(Chunk::empty()); + return Ok(self.slice(0..0)); } let mut after_columns = Vec::with_capacity(self.num_columns()); for value in self.columns() { diff --git a/common/expression/src/kernels/mod.rs b/common/expression/src/kernels/mod.rs index a02c5fe571148..e33109c9e5b39 100644 --- a/common/expression/src/kernels/mod.rs +++ b/common/expression/src/kernels/mod.rs @@ -14,3 +14,5 @@ mod concat; mod filter; +mod scatter; +mod take; diff --git a/common/expression/src/kernels/scatter.rs b/common/expression/src/kernels/scatter.rs new file mode 100644 index 0000000000000..2442ca985b155 --- /dev/null +++ b/common/expression/src/kernels/scatter.rs @@ -0,0 +1,187 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_arrow::arrow::bitmap::MutableBitmap; +use common_arrow::arrow::types::Index; +use common_exception::Result; + +use crate::types::array::ArrayColumnBuilder; +use crate::types::nullable::NullableColumn; +use crate::types::string::StringColumnBuilder; +use crate::types::AnyType; +use crate::types::ArrayType; +use crate::types::BooleanType; +use crate::types::NumberType; +use crate::types::StringType; +use crate::types::ValueType; +use crate::with_number_mapped_type; +use crate::Chunk; +use crate::Column; +use crate::Scalar; +use crate::Value; + +impl Chunk { + pub fn scatter(&self, indices: &[I], scatter_size: usize) -> Result> { + let columns_size = self.num_columns(); + let mut scattered_columns: Vec> = Vec::with_capacity(scatter_size); + + for column_index in 0..columns_size { + match &self.columns()[column_index] { + Value::Scalar(s) => { + scattered_columns.push(Column::scatter_repeat_scalars::( + s, + indices, + scatter_size, + )); + } + Value::Column(c) => { + let cs = c.scatter(indices, scatter_size); + scattered_columns.push(cs); + } + } + } + + let mut scattered_chunks = Vec::with_capacity(scatter_size); + for index in 0..scatter_size { + let mut chunk_columns = vec![]; + let mut size = 0; + for item in scattered_columns.iter() { + size = item[index].len(); + chunk_columns.push(Value::Column(item[index].clone())); + } + scattered_chunks.push(Chunk::new(chunk_columns, size)); + } + + Ok(scattered_chunks) + } +} + +impl Column { + pub fn scatter_repeat_scalars( + scalar: &Scalar, + indices: &[I], + scatter_size: usize, + ) -> Vec { + let mut vs = vec![0usize; scatter_size]; + for index in indices { + vs[index.to_usize()] += 1; + } + vs.iter() + .map(|count| scalar.as_ref().repeat(*count).build()) + .collect() + } + + pub fn scatter(&self, indices: &[I], scatter_size: usize) -> Vec { + let length = indices.len(); + with_number_mapped_type!(SRC_TYPE, match self { + Column::SRC_TYPE(values) => Self::scatter_scalars::, _>( + values, + Vec::with_capacity(length), + indices, + scatter_size + ), + Column::Null { .. } => { + Self::scatter_repeat_scalars::(&Scalar::Null, indices, scatter_size) + } + Column::EmptyArray { .. } => { + Self::scatter_repeat_scalars::(&Scalar::EmptyArray, indices, scatter_size) + } + Column::Boolean(bm) => Self::scatter_scalars::( + bm, + MutableBitmap::with_capacity(length), + indices, + scatter_size + ), + + Column::String(column) => Self::scatter_scalars::( + column, + StringColumnBuilder::with_capacity(length, 0), + indices, + scatter_size + ), + Column::Array(column) => { + let mut builder = ArrayColumnBuilder::::from_column(column.slice(0..0)); + builder.reserve(length); + Self::scatter_scalars::, _>( + column, + builder, + indices, + scatter_size, + ) + } + Column::Nullable(c) => { + let columns = c.column.scatter(indices, scatter_size); + let validitys = Self::scatter_scalars::( + &c.validity, + MutableBitmap::with_capacity(length), + indices, + scatter_size, + ); + columns + .iter() + .zip(validitys.iter()) + .map(|(column, validity)| { + Column::Nullable(Box::new(NullableColumn { + column: column.clone(), + validity: BooleanType::try_downcast_column(validity).unwrap(), + })) + }) + .collect() + } + Column::Tuple { fields, .. } => { + let fields_vs: Vec> = fields + .iter() + .map(|c| c.scatter(indices, scatter_size)) + .collect(); + + (0..scatter_size) + .map(|index| { + let mut columns = Vec::with_capacity(fields.len()); + let mut len = 0; + for field in fields_vs.iter() { + len = field[index].len(); + columns.push(field[index].clone()); + } + + Column::Tuple { + fields: columns, + len, + } + }) + .collect() + } + }) + } + + fn scatter_scalars( + col: &T::Column, + builder: T::ColumnBuilder, + indices: &[I], + scatter_size: usize, + ) -> Vec { + let mut builders: Vec = + std::iter::repeat(builder).take(scatter_size).collect(); + + indices + .iter() + .zip(T::iter_column(col)) + .for_each(|(index, item)| { + T::push_item(&mut builders[index.to_usize()], item); + }); + builders + .into_iter() + .map(|b| T::upcast_column(T::build_column(b))) + .collect() + } +} diff --git a/common/expression/src/kernels/take.rs b/common/expression/src/kernels/take.rs new file mode 100644 index 0000000000000..bc4548b701063 --- /dev/null +++ b/common/expression/src/kernels/take.rs @@ -0,0 +1,125 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_arrow::arrow::bitmap::MutableBitmap; +use common_arrow::arrow::buffer::Buffer; +use common_arrow::arrow::types::Index; +use common_exception::Result; + +use crate::types::array::ArrayColumnBuilder; +use crate::types::nullable::NullableColumn; +use crate::types::string::StringColumnBuilder; +use crate::types::AnyType; +use crate::types::ArrayType; +use crate::types::BooleanType; +use crate::types::StringType; +use crate::types::ValueType; +use crate::with_number_type; +use crate::Chunk; +use crate::Column; +use crate::Value; + +impl Chunk { + pub fn take(self, indices: &[I]) -> Result { + if indices.is_empty() { + return Ok(self.slice(0..0)); + } + + let mut after_columns = Vec::with_capacity(self.num_columns()); + for value in self.columns() { + match value { + Value::Scalar(v) => after_columns.push(Value::Scalar(v.clone())), + Value::Column(c) => after_columns.push(Value::Column(Column::take(c, indices))), + } + } + Ok(Chunk::new(after_columns, indices.len())) + } +} + +impl Column { + pub fn take(&self, indices: &[I]) -> Self { + let length = indices.len(); + with_number_type!(SRC_TYPE, match self { + Column::SRC_TYPE(values) => { + Column::SRC_TYPE(Self::take_primitives(values, indices)) + } + Column::Null { .. } | Column::EmptyArray { .. } => self.slice(0..length), + + Column::Boolean(bm) => Self::take_scalars::( + bm, + MutableBitmap::with_capacity(length), + indices + ), + Column::String(column) => Self::take_scalars::( + column, + StringColumnBuilder::with_capacity(length, 0), + indices + ), + Column::Array(column) => { + let mut builder = ArrayColumnBuilder::::from_column(column.slice(0..0)); + builder.reserve(length); + Self::take_scalars::, _>(column, builder, indices) + } + Column::Nullable(c) => { + let column = c.column.take(indices); + let validity = Self::take_scalars::( + &c.validity, + MutableBitmap::with_capacity(length), + indices, + ); + Column::Nullable(Box::new(NullableColumn { + column, + validity: BooleanType::try_downcast_column(&validity).unwrap(), + })) + } + Column::Tuple { fields, .. } => { + let fields = fields.iter().map(|c| c.take(indices)).collect(); + Column::Tuple { + fields, + len: indices.len(), + } + } + }) + } + + fn take_scalars( + col: &T::Column, + mut builder: T::ColumnBuilder, + indices: &[I], + ) -> Column { + unsafe { + for index in indices { + T::push_item( + &mut builder, + T::index_column_unchecked(col, index.to_usize()), + ) + } + } + T::upcast_column(T::build_column(builder)) + } + + fn take_primitives(col: &Buffer, indices: &[I]) -> Buffer { + let mut vs: Vec = Vec::with_capacity(indices.len()); + let mut dst = vs.as_mut_ptr(); + for index in indices { + unsafe { + let e = col[index.to_usize()]; + dst.write(e); + dst = dst.add(1); + } + } + unsafe { vs.set_len(indices.len()) }; + vs.into() + } +} diff --git a/common/expression/src/types.rs b/common/expression/src/types.rs index 2edd990f4daa3..c11c898208b4f 100755 --- a/common/expression/src/types.rs +++ b/common/expression/src/types.rs @@ -94,6 +94,14 @@ pub trait ValueType: Debug + Clone + PartialEq + Sized + 'static { fn column_len<'a>(col: &'a Self::Column) -> usize; fn index_column<'a>(col: &'a Self::Column, index: usize) -> Option>; + + /// # Safety + /// + /// Calling this method with an out-of-bounds index is *[undefined behavior]* + unsafe fn index_column_unchecked<'a>( + col: &'a Self::Column, + index: usize, + ) -> Self::ScalarRef<'a>; fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column; fn iter_column<'a>(col: &'a Self::Column) -> Self::ColumnIterator<'a>; diff --git a/common/expression/src/types/any.rs b/common/expression/src/types/any.rs index 7d65e8710ec7d..14fc1141db615 100755 --- a/common/expression/src/types/any.rs +++ b/common/expression/src/types/any.rs @@ -73,6 +73,13 @@ impl ValueType for AnyType { col.index(index) } + unsafe fn index_column_unchecked<'a>( + col: &'a Self::Column, + index: usize, + ) -> Self::ScalarRef<'a> { + col.index(index).unwrap() + } + fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column { col.slice(range) } diff --git a/common/expression/src/types/array.rs b/common/expression/src/types/array.rs index f27e154780525..5fca30e83501c 100755 --- a/common/expression/src/types/array.rs +++ b/common/expression/src/types/array.rs @@ -86,6 +86,13 @@ impl ValueType for ArrayType { col.index(index) } + unsafe fn index_column_unchecked<'a>( + col: &'a Self::Column, + index: usize, + ) -> Self::ScalarRef<'a> { + col.index_unchecked(index) + } + fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column { col.slice(range) } @@ -155,7 +162,10 @@ impl ArrayColumn { )) } - pub fn index_unchecked(&self, index: usize) -> T::Column { + /// # Safety + /// + /// Calling this method with an out-of-bounds index is *[undefined behavior]* + pub unsafe fn index_unchecked(&self, index: usize) -> T::Column { T::slice_column( &self.values, (self.offsets[index] as usize)..(self.offsets[index + 1] as usize), diff --git a/common/expression/src/types/boolean.rs b/common/expression/src/types/boolean.rs index 46efac465c354..8683fadd25fc6 100644 --- a/common/expression/src/types/boolean.rs +++ b/common/expression/src/types/boolean.rs @@ -85,6 +85,13 @@ impl ValueType for BooleanType { col.get(index) } + unsafe fn index_column_unchecked<'a>( + col: &'a Self::Column, + index: usize, + ) -> Self::ScalarRef<'a> { + col.get_bit_unchecked(index) + } + fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column { col.clone().slice(range.start, range.end - range.start) } diff --git a/common/expression/src/types/empty_array.rs b/common/expression/src/types/empty_array.rs index b38d71d5df0f3..52547af9679ba 100644 --- a/common/expression/src/types/empty_array.rs +++ b/common/expression/src/types/empty_array.rs @@ -83,6 +83,12 @@ impl ValueType for EmptyArrayType { if index < *len { Some(()) } else { None } } + unsafe fn index_column_unchecked<'a>( + _len: &'a Self::Column, + _index: usize, + ) -> Self::ScalarRef<'a> { + } + fn slice_column<'a>(len: &'a Self::Column, range: Range) -> Self::Column { assert!(range.end <= *len, "range {range:?} out of 0..{len}"); range.end - range.start diff --git a/common/expression/src/types/generic.rs b/common/expression/src/types/generic.rs index f62d7af8378e5..675c58f703780 100755 --- a/common/expression/src/types/generic.rs +++ b/common/expression/src/types/generic.rs @@ -76,6 +76,13 @@ impl ValueType for GenericType { col.index(index) } + unsafe fn index_column_unchecked<'a>( + col: &'a Self::Column, + index: usize, + ) -> Self::ScalarRef<'a> { + col.index(index).unwrap() + } + fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column { col.slice(range) } diff --git a/common/expression/src/types/map.rs b/common/expression/src/types/map.rs index 559a788d2476d..342fbb71b25d6 100755 --- a/common/expression/src/types/map.rs +++ b/common/expression/src/types/map.rs @@ -98,6 +98,13 @@ impl ValueType for KvPair { col.index(index) } + unsafe fn index_column_unchecked<'a>( + col: &'a Self::Column, + index: usize, + ) -> Self::ScalarRef<'a> { + col.index_unchecked(index) + } + fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column { col.slice(range) } @@ -165,6 +172,16 @@ impl KvColumn { )) } + /// # Safety + /// + /// Calling this method with an out-of-bounds index is *[undefined behavior]* + pub unsafe fn index_unchecked(&self, index: usize) -> (K::ScalarRef<'_>, V::ScalarRef<'_>) { + ( + K::index_column_unchecked(&self.keys, index), + V::index_column_unchecked(&self.values, index), + ) + } + fn slice(&self, range: Range) -> Self { KvColumn { keys: K::slice_column(&self.keys, range.clone()), @@ -309,6 +326,13 @@ impl ValueType for MapType { as ValueType>::index_column(col, index) } + unsafe fn index_column_unchecked<'a>( + col: &'a Self::Column, + index: usize, + ) -> Self::ScalarRef<'a> { + as ValueType>::index_column_unchecked(col, index) + } + fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column { as ValueType>::slice_column(col, range) } diff --git a/common/expression/src/types/null.rs b/common/expression/src/types/null.rs index 7d359236c984b..68f10ee2e4308 100644 --- a/common/expression/src/types/null.rs +++ b/common/expression/src/types/null.rs @@ -90,6 +90,12 @@ impl ValueType for NullType { if index < *len { Some(()) } else { None } } + unsafe fn index_column_unchecked<'a>( + _col: &'a Self::Column, + _index: usize, + ) -> Self::ScalarRef<'a> { + } + fn slice_column<'a>(len: &'a Self::Column, range: Range) -> Self::Column { assert!(range.start < *len, "range {range:?} out of 0..{len}"); range.end - range.start diff --git a/common/expression/src/types/nullable.rs b/common/expression/src/types/nullable.rs index 10491b6486c10..7db510df1e7ca 100755 --- a/common/expression/src/types/nullable.rs +++ b/common/expression/src/types/nullable.rs @@ -100,6 +100,13 @@ impl ValueType for NullableType { col.index(index) } + unsafe fn index_column_unchecked<'a>( + col: &'a Self::Column, + index: usize, + ) -> Self::ScalarRef<'a> { + col.index_unchecked(index) + } + fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column { col.slice(range) } @@ -173,6 +180,17 @@ impl NullableColumn { } } + /// # Safety + /// + /// Calling this method with an out-of-bounds index is *[undefined behavior]* + pub unsafe fn index_unchecked(&self, index: usize) -> Option> { + match self.validity.get(index) { + Some(true) => Some(T::index_column(&self.column, index).unwrap()), + Some(false) => None, + _ => None, + } + } + pub fn slice(&self, range: Range) -> Self { NullableColumn { validity: self diff --git a/common/expression/src/types/number.rs b/common/expression/src/types/number.rs index 357607bf10d73..4c08b848b5f94 100644 --- a/common/expression/src/types/number.rs +++ b/common/expression/src/types/number.rs @@ -97,6 +97,13 @@ impl ValueType for NumberType { col.get(index).cloned() } + unsafe fn index_column_unchecked<'a>( + col: &'a Self::Column, + index: usize, + ) -> Self::ScalarRef<'a> { + *col.get_unchecked(index) + } + fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column { col.clone().slice(range.start, range.end - range.start) } diff --git a/common/expression/src/types/string.rs b/common/expression/src/types/string.rs index d887d811edd1d..5ec5c5cab0712 100644 --- a/common/expression/src/types/string.rs +++ b/common/expression/src/types/string.rs @@ -80,6 +80,13 @@ impl ValueType for StringType { col.index(index) } + unsafe fn index_column_unchecked<'a>( + col: &'a Self::Column, + index: usize, + ) -> Self::ScalarRef<'a> { + col.index_unchecked(index) + } + fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column { col.slice(range) } @@ -158,7 +165,10 @@ impl StringColumn { } } - pub fn index_unchecked(&self, index: usize) -> &[u8] { + /// # Safety + /// + /// Calling this method with an out-of-bounds index is *[undefined behavior]* + pub unsafe fn index_unchecked(&self, index: usize) -> &[u8] { &self.data[(self.offsets[index] as usize)..(self.offsets[index + 1] as usize)] } diff --git a/common/expression/tests/it/kernel.rs b/common/expression/tests/it/kernel.rs index f0f92779f13e5..6ab593d1e69a5 100644 --- a/common/expression/tests/it/kernel.rs +++ b/common/expression/tests/it/kernel.rs @@ -103,6 +103,43 @@ pub fn test_pass() { })), ], ]); + + run_take(&mut file, &[0, 3, 1], &[ + Column::Int32(vec![0, 1, 2, 3, -4].into()), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12, 13, 14].into()), + validity: vec![false, true, false, false, false].into(), + })), + Column::Null { len: 5 }, + Column::Nullable(Box::new(NullableColumn { + column: Column::String(StringColumn { + data: "xyzab".as_bytes().to_vec().into(), + offsets: vec![0, 1, 2, 3, 4, 5].into(), + }), + validity: vec![false, true, true, false, false].into(), + })), + ]); + + run_scatter( + &mut file, + &[ + Column::Int32(vec![0, 1, 2, 3, -4].into()), + Column::Nullable(Box::new(NullableColumn { + column: Column::UInt8(vec![10, 11, 12, 13, 14].into()), + validity: vec![false, true, false, false, false].into(), + })), + Column::Null { len: 5 }, + Column::Nullable(Box::new(NullableColumn { + column: Column::String(StringColumn { + data: "xyzab".as_bytes().to_vec().into(), + offsets: vec![0, 1, 2, 3, 4, 5].into(), + }), + validity: vec![false, true, true, false, false].into(), + })), + ], + &[0, 0, 1, 2, 1], + 3, + ); } fn run_filter(file: &mut impl Write, predicate: Column, columns: &[Column]) { @@ -153,3 +190,46 @@ fn run_concat(file: &mut impl Write, columns: Vec>) { } } } + +fn run_take(file: &mut impl Write, indices: &[u32], columns: &[Column]) { + let len = columns.get(0).map(|c| c.len()).unwrap_or(1); + let columns = columns.iter().map(|c| Value::Column(c.clone())).collect(); + let chunk = Chunk::new(columns, len); + + let result = Chunk::take(chunk.clone(), indices); + + match result { + Ok(result_chunk) => { + writeln!(file, "Take: {indices:?}").unwrap(); + writeln!(file, "Source:\n{chunk:?}").unwrap(); + writeln!(file, "Result:\n{result_chunk:?}").unwrap(); + write!(file, "\n\n").unwrap(); + } + Err(err) => { + writeln!(file, "error: {}\n", err.message()).unwrap(); + } + } +} + +fn run_scatter(file: &mut impl Write, columns: &[Column], indices: &[u32], scatter_size: usize) { + let len = columns.get(0).map(|c| c.len()).unwrap_or(1); + let columns = columns.iter().map(|c| Value::Column(c.clone())).collect(); + let chunk = Chunk::new(columns, len); + + let result = Chunk::scatter(&chunk, indices, scatter_size); + + match result { + Ok(result_chunk) => { + writeln!(file, "Scatter: {indices:?}").unwrap(); + writeln!(file, "Source:\n{chunk:?}").unwrap(); + + for (i, c) in result_chunk.iter().enumerate() { + writeln!(file, "Result-{i}:\n{c:?}").unwrap(); + } + write!(file, "\n\n").unwrap(); + } + Err(err) => { + writeln!(file, "error: {}\n", err.message()).unwrap(); + } + } +} diff --git a/common/expression/tests/it/testdata/kernel-pass.txt b/common/expression/tests/it/testdata/kernel-pass.txt index bd056a8ce6afd..c03b43427a17f 100644 --- a/common/expression/tests/it/testdata/kernel-pass.txt +++ b/common/expression/tests/it/testdata/kernel-pass.txt @@ -72,3 +72,63 @@ Result: +-----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +Take: [0, 3, 1] +Source: ++-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column ID | Column Data | ++-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| 0 | Column(Int32([0, 1, 2, 3, -4])) | +| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 11, 12, 13, 14]), validity: [0b___00010] })) | +| 2 | Column(Null { len: 5 }) | +| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 121, 122, 97, 98], offsets: [0, 1, 2, 3, 4, 5] }), validity: [0b___00110] })) | ++-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +Result: ++-----------+-------------------------------------------------------------------------------------------------------------------------------------------+ +| Column ID | Column Data | ++-----------+-------------------------------------------------------------------------------------------------------------------------------------------+ +| 0 | Column(Int32([0, 3, 1])) | +| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 13, 11]), validity: [0b_____100] })) | +| 2 | Column(Null { len: 3 }) | +| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 97, 121], offsets: [0, 1, 2, 3] }), validity: [0b_____100] })) | ++-----------+-------------------------------------------------------------------------------------------------------------------------------------------+ + + +Scatter: [0, 0, 1, 2, 1] +Source: ++-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column ID | Column Data | ++-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +| 0 | Column(Int32([0, 1, 2, 3, -4])) | +| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 11, 12, 13, 14]), validity: [0b___00010] })) | +| 2 | Column(Null { len: 5 }) | +| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 121, 122, 97, 98], offsets: [0, 1, 2, 3, 4, 5] }), validity: [0b___00110] })) | ++-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ +Result-0: ++-----------+------------------------------------------------------------------------------------------------------------------------------------+ +| Column ID | Column Data | ++-----------+------------------------------------------------------------------------------------------------------------------------------------+ +| 0 | Column(Int32([0, 1])) | +| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 11]), validity: [0b______10] })) | +| 2 | Column(Null { len: 2 }) | +| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 121], offsets: [0, 1, 2] }), validity: [0b______10] })) | ++-----------+------------------------------------------------------------------------------------------------------------------------------------+ +Result-1: ++-----------+-----------------------------------------------------------------------------------------------------------------------------------+ +| Column ID | Column Data | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------+ +| 0 | Column(Int32([2, -4])) | +| 1 | Column(Nullable(NullableColumn { column: UInt8([12, 14]), validity: [0b______00] })) | +| 2 | Column(Null { len: 2 }) | +| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [122, 98], offsets: [0, 1, 2] }), validity: [0b______01] })) | ++-----------+-----------------------------------------------------------------------------------------------------------------------------------+ +Result-2: ++-----------+---------------------------------------------------------------------------------------------------------------------------+ +| Column ID | Column Data | ++-----------+---------------------------------------------------------------------------------------------------------------------------+ +| 0 | Column(Int32([3])) | +| 1 | Column(Nullable(NullableColumn { column: UInt8([13]), validity: [0b_______0] })) | +| 2 | Column(Null { len: 1 }) | +| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [97], offsets: [0, 1] }), validity: [0b_______0] })) | ++-----------+---------------------------------------------------------------------------------------------------------------------------+ + + From 4b1a143fabde954d8b7f83b9693f6d4162f0c332 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 9 Aug 2022 13:17:03 +0800 Subject: [PATCH 08/59] feat(query): parquet schema case insenstive match --- common/formats/src/format_parquet.rs | 54 ++--- tests/fuzz/fuzz.py | 1 - .../base/02_function/02_0057_function_nullif | 3 +- .../base/02_function/02_0058_function_ifnull | 3 +- .../02_0059_function_is_distinct_from | 3 +- .../base/03_dml/03_0014_insert_into_select_v2 | 3 +- .../base/03_dml/03_0016_insert_into_values_v2 | 3 +- .../base/03_dml/03_0017_insert_overwrite_v2 | 3 +- .../03_dml/03_0018_insert_into_variant_v2 | 3 +- .../suites/base/03_dml/03_0025_delete_from_v2 | 3 +- .../suites/base/04_explain/04_0002_explain_v2 | 3 +- .../base/05_ddl/05_0000_ddl_create_tables_v2 | 3 +- .../base/05_ddl/05_0000_ddl_drop_tables_v2 | 3 +- .../05_ddl/05_0001_ddl_create_database_v2 | 3 +- .../base/05_ddl/05_0001_ddl_drop_database_v2 | 3 +- .../05_ddl/05_0001_ddl_drop_table_full_v2 | 3 +- .../base/05_ddl/05_0003_ddl_alter_database_v2 | 3 +- .../base/05_ddl/05_0003_ddl_alter_tabld_v2 | 3 +- .../base/05_ddl/05_0003_ddl_rename_table_v2 | 3 +- .../base/05_ddl/05_0003_ddl_truncate_table_v2 | 3 +- .../base/05_ddl/05_0004_ddl_create_user_v2 | 3 +- .../05_ddl/05_0006_ddl_grant_privilege_v2 | 3 +- .../base/05_ddl/05_0014_ddl_create_role_v2 | 3 +- .../base/05_ddl/05_0015_ddl_drop_role_v2 | 3 +- .../suites/base/05_ddl/05_0016_ddl_stage_v2 | 3 +- .../base/05_ddl/05_0017_ddl_grant_role_v2 | 3 +- .../base/05_ddl/05_0019_ddl_create_view_v2 | 3 +- .../base/05_ddl/05_0020_ddl_drop_view_v2 | 3 +- .../base/05_ddl/05_0021_ddl_alter_view_v2 | 3 +- .../base/06_show/06_0005_show_functions_v2 | 3 +- .../suites/base/06_show/06_0007_show_roles_v2 | 3 +- .../09_fuse_engine/09_0001_remote_insert_v2 | 3 +- .../09_0004_remote_insert_into_select_v2 | 3 +- .../09_0005_remote_insert_into_select_v2 | 3 +- .../09_0008_fuse_optimize_table | 3 +- .../09_0010_remote_insert_overwrite_v2 | 3 +- .../base/09_fuse_engine/09_0018_min_max_index | 3 +- .../suites/base/15_query/aggregate.test | 3 +- .../15_query/alias/having_with_alias.test | 3 +- .../base/15_query/functions/arguments.test | 3 +- .../suites/base/15_query/functions/cast.test | 3 +- .../suites/base/15_query/having.test | 3 +- .../logictest/suites/base/15_query/order.test | 3 +- .../suites/base/15_query/render_result.test | 3 +- .../suites/base/15_query/select.test | 3 +- .../suites/base/15_query/subquery.test | 3 +- .../suites/base/20+_others/20_0001_planner_v2 | 6 +- .../20_0002_planner_v2_display_error | 3 +- tests/logictest/suites/ydb/select1-1.test | 3 +- tests/logictest/suites/ydb/select1-2.test | 3 +- tests/logictest/suites/ydb/select1-3.test | 3 +- tests/logictest/suites/ydb/select1-4.test | 3 +- tests/logictest/suites/ydb/select1-5.test | 3 +- tests/logictest/suites/ydb/select2-1.test | 3 +- tests/logictest/suites/ydb/select2-2.test | 3 +- tests/logictest/suites/ydb/select2-3.test | 3 +- tests/logictest/suites/ydb/select2-4.test | 3 +- tests/logictest/suites/ydb/select2-5.test | 3 +- tests/logictest/suites/ydb/select3-1.test | 3 +- tests/logictest/suites/ydb/select3-10.test | 3 +- tests/logictest/suites/ydb/select3-11.test | 3 +- tests/logictest/suites/ydb/select3-12.test | 3 +- tests/logictest/suites/ydb/select3-13.test | 3 +- tests/logictest/suites/ydb/select3-14.test | 3 +- tests/logictest/suites/ydb/select3-15.test | 3 +- tests/logictest/suites/ydb/select3-2.test | 3 +- tests/logictest/suites/ydb/select3-3.test | 3 +- tests/logictest/suites/ydb/select3-4.test | 3 +- tests/logictest/suites/ydb/select3-5.test | 3 +- tests/logictest/suites/ydb/select3-6.test | 3 +- tests/logictest/suites/ydb/select3-7.test | 3 +- tests/logictest/suites/ydb/select3-8.test | 3 +- tests/logictest/suites/ydb/select3-9.test | 3 +- .../04_explain/04_0002_explain_v2.sql | 2 +- .../12_0004_time_travel_select_at.sh | 5 +- .../suites/0_stateless/13_tpch/13_0001_q1.sql | 2 +- .../suites/0_stateless/13_tpch/13_0002_q2.sql | 2 +- .../suites/0_stateless/13_tpch/13_0003_q3.sql | 2 +- .../suites/0_stateless/13_tpch/13_0004_q4.sql | 2 +- .../suites/0_stateless/13_tpch/13_0005_q5.sql | 2 +- .../suites/0_stateless/13_tpch/13_0007_q7.sql | 2 +- .../suites/0_stateless/13_tpch/13_0009_q9.sql | 2 +- .../0_stateless/13_tpch/13_0010_q10.sql | 2 +- .../0_stateless/13_tpch/13_0011_q11.sql | 2 +- .../0_stateless/13_tpch/13_0012_q12.sql | 2 +- .../0_stateless/13_tpch/13_0013_q13.sql | 2 +- .../0_stateless/13_tpch/13_0014_q14.sql | 2 +- .../0_stateless/13_tpch/13_0015_q15.sql | 2 +- .../0_stateless/13_tpch/13_0016_q16.sql | 2 +- .../0_stateless/13_tpch/13_0017_q17.sql | 2 +- .../0_stateless/13_tpch/13_0018_q18.sql | 2 +- .../0_stateless/13_tpch/13_0019_q19.sql | 3 +- .../0_stateless/13_tpch/13_0020_q20.sql | 2 +- .../0_stateless/13_tpch/13_0021_q21.sql | 2 +- .../0_stateless/13_tpch/13_0022_q22.sql | 2 +- .../20+_others/20_0009_format_diagnostic.sh | 2 +- .../00_copy/00_0000_copy_from_s3_location.sh | 2 +- .../00_0001_copy_from_http_location.sh | 2 +- .../01_load_v2/01_0000_streaming_load.sh | 27 ++- .../01_load_v2/01_0001_upload_to_stage.sh | 2 +- .../01_0002_remove_external_stage.sh | 2 +- .../01_0002_remove_internal_stage.sh | 2 +- .../01_load_v2/01_0003_sync_stage_file.sh | 2 +- .../01_0004_streaming_variant_load.sh | 2 +- .../04_mini_dataset/04_0000_mini_ontime.sh | 3 - .../04_mini_dataset/04_0001_mini_hits.sh | 4 - tests/suites/1_stateful/ddl/hits.sql | 212 ++++++++--------- tests/suites/1_stateful/ddl/ontime.sql | 218 +++++++++--------- tests/suites/1_stateful/ddl/variant_test.sql | 4 +- 109 files changed, 360 insertions(+), 443 deletions(-) diff --git a/common/formats/src/format_parquet.rs b/common/formats/src/format_parquet.rs index 3194cd0d8ef78..c6504b6c9152b 100644 --- a/common/formats/src/format_parquet.rs +++ b/common/formats/src/format_parquet.rs @@ -31,7 +31,6 @@ use common_datablocks::DataBlock; use common_datavalues::remove_nullable; use common_datavalues::DataSchema; use common_datavalues::DataSchemaRef; -use common_datavalues::DataTypeImpl; use common_exception::ErrorCode; use common_exception::Result; use common_io::prelude::FileSplit; @@ -133,39 +132,26 @@ impl InputFormat for ParquetInputFormat { let infer_schema = read::infer_schema(&parquet_metadata)?; let actually_schema = DataSchema::from(&infer_schema); - if actually_schema.num_fields() != self.schema.num_fields() { - return Err(ErrorCode::ParquetError(format!( - "schema field size mismatch, expected: {}, got: {} ", - actually_schema.num_fields(), - self.schema.num_fields() - ))); - } - - // we ignore the nullable sign to compare the schema - let fa: Vec<(&String, DataTypeImpl)> = self - .schema - .fields() - .iter() - .map(|f| (f.name(), remove_nullable(f.data_type()))) - .collect(); - - let fb: Vec<(&String, DataTypeImpl)> = actually_schema - .fields() - .iter() - .map(|f| (f.name(), remove_nullable(f.data_type()))) - .collect(); - - if fa != fb { - let diff = Diff::from_debug( - &self.schema, - &actually_schema, - "expected_schema", - "infer_schema", - ); - return Err(ErrorCode::ParquetError(format!( - "parquet schema mismatch, differ: {}", - diff - ))); + for f in self.schema.fields().iter() { + if let Some(m) = actually_schema + .fields() + .iter() + .filter(|c| c.name().eq_ignore_ascii_case(f.name())) + .last() + { + if remove_nullable(m.data_type()) != remove_nullable(f.data_type()) { + let diff = Diff::from_debug(f, m, "expected_field", "infer_field"); + return Err(ErrorCode::ParquetError(format!( + "parquet schema mismatch, differ: {}", + diff + ))); + } + } else { + return Err(ErrorCode::ParquetError(format!( + "schema field size mismatch, expected to find column: {}", + f.name() + ))); + } } let fields = &self.arrow_table_schema.fields; diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index 83117257ca60f..2ad7f90ebb1e9 100644 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -56,7 +56,6 @@ def nonterminals(expansion): prepare_sqls = [ - "SET enable_planner_v2 = 1;", "drop table if exists t1;", "drop table if exists t2;", "drop table if exists t3;", diff --git a/tests/logictest/suites/base/02_function/02_0057_function_nullif b/tests/logictest/suites/base/02_function/02_0057_function_nullif index 9d2072c413b54..29e2771094f63 100644 --- a/tests/logictest/suites/base/02_function/02_0057_function_nullif +++ b/tests/logictest/suites/base/02_function/02_0057_function_nullif @@ -1,5 +1,4 @@ -statement ok -SET enable_planner_v2 = 1; + statement query I SELECT NULLIF(2, 1); diff --git a/tests/logictest/suites/base/02_function/02_0058_function_ifnull b/tests/logictest/suites/base/02_function/02_0058_function_ifnull index b225233505a48..6037539748027 100644 --- a/tests/logictest/suites/base/02_function/02_0058_function_ifnull +++ b/tests/logictest/suites/base/02_function/02_0058_function_ifnull @@ -1,5 +1,4 @@ -statement ok -SET enable_planner_v2 = 1; + statement query I SELECT IFNULL(1, 1); diff --git a/tests/logictest/suites/base/02_function/02_0059_function_is_distinct_from b/tests/logictest/suites/base/02_function/02_0059_function_is_distinct_from index 84375e4c57b65..64f2023939216 100644 --- a/tests/logictest/suites/base/02_function/02_0059_function_is_distinct_from +++ b/tests/logictest/suites/base/02_function/02_0059_function_is_distinct_from @@ -1,5 +1,4 @@ -statement ok -SET enable_planner_v2 = 1; + statement query B SELECT 1 IS DISTINCT FROM 2; diff --git a/tests/logictest/suites/base/03_dml/03_0014_insert_into_select_v2 b/tests/logictest/suites/base/03_dml/03_0014_insert_into_select_v2 index 284f08eff538b..b47ab7a477b28 100644 --- a/tests/logictest/suites/base/03_dml/03_0014_insert_into_select_v2 +++ b/tests/logictest/suites/base/03_dml/03_0014_insert_into_select_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP DATABASE IF EXISTS db1; diff --git a/tests/logictest/suites/base/03_dml/03_0016_insert_into_values_v2 b/tests/logictest/suites/base/03_dml/03_0016_insert_into_values_v2 index a8febcbb90e76..0f82be6bb86fa 100644 --- a/tests/logictest/suites/base/03_dml/03_0016_insert_into_values_v2 +++ b/tests/logictest/suites/base/03_dml/03_0016_insert_into_values_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP DATABASE IF EXISTS db1; diff --git a/tests/logictest/suites/base/03_dml/03_0017_insert_overwrite_v2 b/tests/logictest/suites/base/03_dml/03_0017_insert_overwrite_v2 index 9baf0bff581bc..d80a373dcb882 100644 --- a/tests/logictest/suites/base/03_dml/03_0017_insert_overwrite_v2 +++ b/tests/logictest/suites/base/03_dml/03_0017_insert_overwrite_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP DATABASE IF EXISTS db1; diff --git a/tests/logictest/suites/base/03_dml/03_0018_insert_into_variant_v2 b/tests/logictest/suites/base/03_dml/03_0018_insert_into_variant_v2 index 8c984a68ec208..e03a30feb6150 100644 --- a/tests/logictest/suites/base/03_dml/03_0018_insert_into_variant_v2 +++ b/tests/logictest/suites/base/03_dml/03_0018_insert_into_variant_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP DATABASE IF EXISTS db1; diff --git a/tests/logictest/suites/base/03_dml/03_0025_delete_from_v2 b/tests/logictest/suites/base/03_dml/03_0025_delete_from_v2 index ac12f9a20261c..9043e9c19f20e 100644 --- a/tests/logictest/suites/base/03_dml/03_0025_delete_from_v2 +++ b/tests/logictest/suites/base/03_dml/03_0025_delete_from_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP DATABASE IF EXISTS db1; diff --git a/tests/logictest/suites/base/04_explain/04_0002_explain_v2 b/tests/logictest/suites/base/04_explain/04_0002_explain_v2 index 5b898a352408b..f9822ab58b796 100644 --- a/tests/logictest/suites/base/04_explain/04_0002_explain_v2 +++ b/tests/logictest/suites/base/04_explain/04_0002_explain_v2 @@ -1,8 +1,7 @@ -- TODO(need fix) onlyif mysql -statement ok -set enable_planner_v2 = 1; + onlyif mysql statement ok diff --git a/tests/logictest/suites/base/05_ddl/05_0000_ddl_create_tables_v2 b/tests/logictest/suites/base/05_ddl/05_0000_ddl_create_tables_v2 index aa8c456971a3e..f37da387f99cc 100644 --- a/tests/logictest/suites/base/05_ddl/05_0000_ddl_create_tables_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0000_ddl_create_tables_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP TABLE IF EXISTS t; diff --git a/tests/logictest/suites/base/05_ddl/05_0000_ddl_drop_tables_v2 b/tests/logictest/suites/base/05_ddl/05_0000_ddl_drop_tables_v2 index aa0ab666c1e5c..184d3251b1457 100644 --- a/tests/logictest/suites/base/05_ddl/05_0000_ddl_drop_tables_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0000_ddl_drop_tables_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP TABLE IF EXISTS t; diff --git a/tests/logictest/suites/base/05_ddl/05_0001_ddl_create_database_v2 b/tests/logictest/suites/base/05_ddl/05_0001_ddl_create_database_v2 index f75950a2916fe..a26d23140d6cd 100644 --- a/tests/logictest/suites/base/05_ddl/05_0001_ddl_create_database_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0001_ddl_create_database_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP DATABASE IF EXISTS db; diff --git a/tests/logictest/suites/base/05_ddl/05_0001_ddl_drop_database_v2 b/tests/logictest/suites/base/05_ddl/05_0001_ddl_drop_database_v2 index 13f9e79be7666..cd12b2f7b3032 100644 --- a/tests/logictest/suites/base/05_ddl/05_0001_ddl_drop_database_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0001_ddl_drop_database_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP DATABASE IF EXISTS db; diff --git a/tests/logictest/suites/base/05_ddl/05_0001_ddl_drop_table_full_v2 b/tests/logictest/suites/base/05_ddl/05_0001_ddl_drop_table_full_v2 index 5f3c5aa0180a9..ea99416296f7e 100644 --- a/tests/logictest/suites/base/05_ddl/05_0001_ddl_drop_table_full_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0001_ddl_drop_table_full_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP DATABASE IF EXISTS db_13_0001; diff --git a/tests/logictest/suites/base/05_ddl/05_0003_ddl_alter_database_v2 b/tests/logictest/suites/base/05_ddl/05_0003_ddl_alter_database_v2 index 13f78d6dea6b9..65d65ef935962 100644 --- a/tests/logictest/suites/base/05_ddl/05_0003_ddl_alter_database_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0003_ddl_alter_database_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP DATABASE IF EXISTS a; diff --git a/tests/logictest/suites/base/05_ddl/05_0003_ddl_alter_tabld_v2 b/tests/logictest/suites/base/05_ddl/05_0003_ddl_alter_tabld_v2 index 6559cda700bc6..d70b3b06bb697 100644 --- a/tests/logictest/suites/base/05_ddl/05_0003_ddl_alter_tabld_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0003_ddl_alter_tabld_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP TABLE IF EXISTS `05_0003_at_t0`; diff --git a/tests/logictest/suites/base/05_ddl/05_0003_ddl_rename_table_v2 b/tests/logictest/suites/base/05_ddl/05_0003_ddl_rename_table_v2 index 2e4dfa8ee2500..3fe4a22ab55b1 100644 --- a/tests/logictest/suites/base/05_ddl/05_0003_ddl_rename_table_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0003_ddl_rename_table_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP TABLE IF EXISTS t0; diff --git a/tests/logictest/suites/base/05_ddl/05_0003_ddl_truncate_table_v2 b/tests/logictest/suites/base/05_ddl/05_0003_ddl_truncate_table_v2 index edba98e9bf5af..64ea0d98baf79 100644 --- a/tests/logictest/suites/base/05_ddl/05_0003_ddl_truncate_table_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0003_ddl_truncate_table_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP TABLE IF EXISTS t1; diff --git a/tests/logictest/suites/base/05_ddl/05_0004_ddl_create_user_v2 b/tests/logictest/suites/base/05_ddl/05_0004_ddl_create_user_v2 index 883e0cf19f126..3749ca05e3b68 100644 --- a/tests/logictest/suites/base/05_ddl/05_0004_ddl_create_user_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0004_ddl_create_user_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP USER IF EXISTS 'test-a'@'localhost'; diff --git a/tests/logictest/suites/base/05_ddl/05_0006_ddl_grant_privilege_v2 b/tests/logictest/suites/base/05_ddl/05_0006_ddl_grant_privilege_v2 index b90251eda7468..7a634b8213b4e 100644 --- a/tests/logictest/suites/base/05_ddl/05_0006_ddl_grant_privilege_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0006_ddl_grant_privilege_v2 @@ -1,5 +1,4 @@ -statement ok -SET enable_planner_v2 = 1; + statement ok DROP USER IF EXISTS 'test-grant'@'localhost'; diff --git a/tests/logictest/suites/base/05_ddl/05_0014_ddl_create_role_v2 b/tests/logictest/suites/base/05_ddl/05_0014_ddl_create_role_v2 index 727f3a34322c8..a3c92c7cc0429 100644 --- a/tests/logictest/suites/base/05_ddl/05_0014_ddl_create_role_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0014_ddl_create_role_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok CREATE ROLE 'test-a'; diff --git a/tests/logictest/suites/base/05_ddl/05_0015_ddl_drop_role_v2 b/tests/logictest/suites/base/05_ddl/05_0015_ddl_drop_role_v2 index 8578432e8aadf..4998656d7e402 100644 --- a/tests/logictest/suites/base/05_ddl/05_0015_ddl_drop_role_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0015_ddl_drop_role_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement error 2204 DROP ROLE 'test-b'; diff --git a/tests/logictest/suites/base/05_ddl/05_0016_ddl_stage_v2 b/tests/logictest/suites/base/05_ddl/05_0016_ddl_stage_v2 index c83a20aad73cc..3f4aeb2d6270e 100644 --- a/tests/logictest/suites/base/05_ddl/05_0016_ddl_stage_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0016_ddl_stage_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok CREATE STAGE test_stage url='s3://load/files/' credentials=(aws_key_id='1a2b3c' aws_secret_key='4x5y6z'); diff --git a/tests/logictest/suites/base/05_ddl/05_0017_ddl_grant_role_v2 b/tests/logictest/suites/base/05_ddl/05_0017_ddl_grant_role_v2 index a88abc23d0c0c..5607cde9f6738 100644 --- a/tests/logictest/suites/base/05_ddl/05_0017_ddl_grant_role_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0017_ddl_grant_role_v2 @@ -1,5 +1,4 @@ -statement ok -SET enable_planner_v2 = 1; + statement ok DROP ROLE IF EXISTS 'test'; diff --git a/tests/logictest/suites/base/05_ddl/05_0019_ddl_create_view_v2 b/tests/logictest/suites/base/05_ddl/05_0019_ddl_create_view_v2 index 5117f3f6e42c5..798100a04279a 100644 --- a/tests/logictest/suites/base/05_ddl/05_0019_ddl_create_view_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0019_ddl_create_view_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP VIEW IF EXISTS tmp_view; diff --git a/tests/logictest/suites/base/05_ddl/05_0020_ddl_drop_view_v2 b/tests/logictest/suites/base/05_ddl/05_0020_ddl_drop_view_v2 index 86665bbeab40f..1c421bb00566a 100644 --- a/tests/logictest/suites/base/05_ddl/05_0020_ddl_drop_view_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0020_ddl_drop_view_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP VIEW IF EXISTS tmp_view; diff --git a/tests/logictest/suites/base/05_ddl/05_0021_ddl_alter_view_v2 b/tests/logictest/suites/base/05_ddl/05_0021_ddl_alter_view_v2 index 2580582abc505..4aa3cec5a9399 100644 --- a/tests/logictest/suites/base/05_ddl/05_0021_ddl_alter_view_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0021_ddl_alter_view_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP VIEW IF EXISTS tmp_view; diff --git a/tests/logictest/suites/base/06_show/06_0005_show_functions_v2 b/tests/logictest/suites/base/06_show/06_0005_show_functions_v2 index 24f1878f5c799..7965db00b18be 100644 --- a/tests/logictest/suites/base/06_show/06_0005_show_functions_v2 +++ b/tests/logictest/suites/base/06_show/06_0005_show_functions_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement query TBBTT SHOW FUNCTIONS LIKE 'today%'; diff --git a/tests/logictest/suites/base/06_show/06_0007_show_roles_v2 b/tests/logictest/suites/base/06_show/06_0007_show_roles_v2 index f7ca9cf8ec592..6ba13fadf6802 100644 --- a/tests/logictest/suites/base/06_show/06_0007_show_roles_v2 +++ b/tests/logictest/suites/base/06_show/06_0007_show_roles_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok CREATE ROLE 'test'; diff --git a/tests/logictest/suites/base/09_fuse_engine/09_0001_remote_insert_v2 b/tests/logictest/suites/base/09_fuse_engine/09_0001_remote_insert_v2 index 0f46c83a45728..e24875c4682b6 100644 --- a/tests/logictest/suites/base/09_fuse_engine/09_0001_remote_insert_v2 +++ b/tests/logictest/suites/base/09_fuse_engine/09_0001_remote_insert_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP DATABASE IF EXISTS db1; diff --git a/tests/logictest/suites/base/09_fuse_engine/09_0004_remote_insert_into_select_v2 b/tests/logictest/suites/base/09_fuse_engine/09_0004_remote_insert_into_select_v2 index d2b9891bf3dcd..25752933c70ea 100644 --- a/tests/logictest/suites/base/09_fuse_engine/09_0004_remote_insert_into_select_v2 +++ b/tests/logictest/suites/base/09_fuse_engine/09_0004_remote_insert_into_select_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP DATABASE IF EXISTS db_09_004; diff --git a/tests/logictest/suites/base/09_fuse_engine/09_0005_remote_insert_into_select_v2 b/tests/logictest/suites/base/09_fuse_engine/09_0005_remote_insert_into_select_v2 index c097f73b5cb24..f6a827326032a 100644 --- a/tests/logictest/suites/base/09_fuse_engine/09_0005_remote_insert_into_select_v2 +++ b/tests/logictest/suites/base/09_fuse_engine/09_0005_remote_insert_into_select_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP DATABASE IF EXISTS db_09_0005; diff --git a/tests/logictest/suites/base/09_fuse_engine/09_0008_fuse_optimize_table b/tests/logictest/suites/base/09_fuse_engine/09_0008_fuse_optimize_table index 35c31b08ba924..493b3beb01c49 100644 --- a/tests/logictest/suites/base/09_fuse_engine/09_0008_fuse_optimize_table +++ b/tests/logictest/suites/base/09_fuse_engine/09_0008_fuse_optimize_table @@ -50,8 +50,7 @@ Projection: count():UInt64 Expression: 3:UInt64 (Exact Statistics) ReadDataSource: scan schema: [dummy:UInt8], statistics: [read_rows: 1, read_bytes: 1, partitions_scanned: 1, partitions_total: 1] -statement ok -set enable_planner_v2 = 1; + statement query B select count(*)=4 from fuse_snapshot('db_09_0008', 't'); diff --git a/tests/logictest/suites/base/09_fuse_engine/09_0010_remote_insert_overwrite_v2 b/tests/logictest/suites/base/09_fuse_engine/09_0010_remote_insert_overwrite_v2 index 76d6cb51a1a57..5e10aa703f1d8 100644 --- a/tests/logictest/suites/base/09_fuse_engine/09_0010_remote_insert_overwrite_v2 +++ b/tests/logictest/suites/base/09_fuse_engine/09_0010_remote_insert_overwrite_v2 @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP DATABASE IF EXISTS db1; diff --git a/tests/logictest/suites/base/09_fuse_engine/09_0018_min_max_index b/tests/logictest/suites/base/09_fuse_engine/09_0018_min_max_index index ad36188664a2d..bc3eede3ccf73 100644 --- a/tests/logictest/suites/base/09_fuse_engine/09_0018_min_max_index +++ b/tests/logictest/suites/base/09_fuse_engine/09_0018_min_max_index @@ -38,8 +38,7 @@ Projection: c1:Int32, c2:String Filter: (c2 > b) ReadDataSource: scan schema: [c1:Int32, c2:String], statistics: [read_rows: 1, read_bytes: 55, partitions_scanned: 1, partitions_total: 3], push_downs: [projections: [0, 1], filters: [(c2 > b)]] -statement ok -set enable_planner_v2 = 1; + statement ok DROP TABLE t; diff --git a/tests/logictest/suites/base/15_query/aggregate.test b/tests/logictest/suites/base/15_query/aggregate.test index 00b9483a69260..c2167f711a7b3 100644 --- a/tests/logictest/suites/base/15_query/aggregate.test +++ b/tests/logictest/suites/base/15_query/aggregate.test @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + -- # non-grouping column error statement error must appear in the GROUP BY clause or be used in an aggregate function diff --git a/tests/logictest/suites/base/15_query/alias/having_with_alias.test b/tests/logictest/suites/base/15_query/alias/having_with_alias.test index d124670a21aa9..0ee2f90da1db0 100644 --- a/tests/logictest/suites/base/15_query/alias/having_with_alias.test +++ b/tests/logictest/suites/base/15_query/alias/having_with_alias.test @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement query I select count(*) as count from (select * from numbers(1)) having count = 1; diff --git a/tests/logictest/suites/base/15_query/functions/arguments.test b/tests/logictest/suites/base/15_query/functions/arguments.test index 1b1d51c1b7f94..8a6da61245f3e 100644 --- a/tests/logictest/suites/base/15_query/functions/arguments.test +++ b/tests/logictest/suites/base/15_query/functions/arguments.test @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + # Number arguments not match statement error 1028 diff --git a/tests/logictest/suites/base/15_query/functions/cast.test b/tests/logictest/suites/base/15_query/functions/cast.test index ab05c92236faa..d639779a40f19 100644 --- a/tests/logictest/suites/base/15_query/functions/cast.test +++ b/tests/logictest/suites/base/15_query/functions/cast.test @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok drop table if exists cast_test; diff --git a/tests/logictest/suites/base/15_query/having.test b/tests/logictest/suites/base/15_query/having.test index 94a2a73036a4d..7dc958ea74316 100644 --- a/tests/logictest/suites/base/15_query/having.test +++ b/tests/logictest/suites/base/15_query/having.test @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement query I select * from numbers(10) having number = 1; diff --git a/tests/logictest/suites/base/15_query/order.test b/tests/logictest/suites/base/15_query/order.test index e7f6a71c450b0..92fffb321a568 100644 --- a/tests/logictest/suites/base/15_query/order.test +++ b/tests/logictest/suites/base/15_query/order.test @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok create table order_test(a int null); diff --git a/tests/logictest/suites/base/15_query/render_result.test b/tests/logictest/suites/base/15_query/render_result.test index b30288d2f0d3a..5745364b6e6d1 100644 --- a/tests/logictest/suites/base/15_query/render_result.test +++ b/tests/logictest/suites/base/15_query/render_result.test @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok drop table if exists t1 all; diff --git a/tests/logictest/suites/base/15_query/select.test b/tests/logictest/suites/base/15_query/select.test index 7dc37809708a5..58331fd0cf656 100644 --- a/tests/logictest/suites/base/15_query/select.test +++ b/tests/logictest/suites/base/15_query/select.test @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement query T select 'Hello, world!'; diff --git a/tests/logictest/suites/base/15_query/subquery.test b/tests/logictest/suites/base/15_query/subquery.test index 6c2a114f6e2cc..b9d732fc127e4 100644 --- a/tests/logictest/suites/base/15_query/subquery.test +++ b/tests/logictest/suites/base/15_query/subquery.test @@ -1,5 +1,4 @@ -statement ok -set enable_planner_v2 = 1; + statement ok DROP TABLE IF EXISTS c; diff --git a/tests/logictest/suites/base/20+_others/20_0001_planner_v2 b/tests/logictest/suites/base/20+_others/20_0001_planner_v2 index cbefc9e80947c..8d205f9a8a81f 100644 --- a/tests/logictest/suites/base/20+_others/20_0001_planner_v2 +++ b/tests/logictest/suites/base/20+_others/20_0001_planner_v2 @@ -1,6 +1,5 @@ -statement ok -set enable_planner_v2 = 1; + statement query I @@ -1671,8 +1670,7 @@ statement ok insert into t3 values(1); -statement ok -set enable_planner_v2 = 1; + statement query I diff --git a/tests/logictest/suites/base/20+_others/20_0002_planner_v2_display_error b/tests/logictest/suites/base/20+_others/20_0002_planner_v2_display_error index 21ee4559608b8..8e2645691796d 100644 --- a/tests/logictest/suites/base/20+_others/20_0002_planner_v2_display_error +++ b/tests/logictest/suites/base/20+_others/20_0002_planner_v2_display_error @@ -1,6 +1,5 @@ -statement ok -set enable_planner_v2 = 1; + statement ok diff --git a/tests/logictest/suites/ydb/select1-1.test b/tests/logictest/suites/ydb/select1-1.test index f8468cdd64163..32149d1dd3464 100644 --- a/tests/logictest/suites/ydb/select1-1.test +++ b/tests/logictest/suites/ydb/select1-1.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer, b integer, c integer, d integer, e integer); diff --git a/tests/logictest/suites/ydb/select1-2.test b/tests/logictest/suites/ydb/select1-2.test index e814ed1de1ac7..7a8a998fff4fa 100644 --- a/tests/logictest/suites/ydb/select1-2.test +++ b/tests/logictest/suites/ydb/select1-2.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer, b integer, c integer, d integer, e integer); diff --git a/tests/logictest/suites/ydb/select1-3.test b/tests/logictest/suites/ydb/select1-3.test index a10ef57660008..42215c4d6861f 100644 --- a/tests/logictest/suites/ydb/select1-3.test +++ b/tests/logictest/suites/ydb/select1-3.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer, b integer, c integer, d integer, e integer); diff --git a/tests/logictest/suites/ydb/select1-4.test b/tests/logictest/suites/ydb/select1-4.test index 0c3487fbe05db..2baaecfd9113d 100644 --- a/tests/logictest/suites/ydb/select1-4.test +++ b/tests/logictest/suites/ydb/select1-4.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer, b integer, c integer, d integer, e integer); diff --git a/tests/logictest/suites/ydb/select1-5.test b/tests/logictest/suites/ydb/select1-5.test index ae6d6edb96b3d..0184953168e86 100644 --- a/tests/logictest/suites/ydb/select1-5.test +++ b/tests/logictest/suites/ydb/select1-5.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer, b integer, c integer, d integer, e integer); diff --git a/tests/logictest/suites/ydb/select2-1.test b/tests/logictest/suites/ydb/select2-1.test index 782785fa731b9..7cce01af79154 100644 --- a/tests/logictest/suites/ydb/select2-1.test +++ b/tests/logictest/suites/ydb/select2-1.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select2-2.test b/tests/logictest/suites/ydb/select2-2.test index 61854a9cdab05..47dba7267eeef 100644 --- a/tests/logictest/suites/ydb/select2-2.test +++ b/tests/logictest/suites/ydb/select2-2.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select2-3.test b/tests/logictest/suites/ydb/select2-3.test index a324ad3afb5b3..4ca7c886ba107 100644 --- a/tests/logictest/suites/ydb/select2-3.test +++ b/tests/logictest/suites/ydb/select2-3.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select2-4.test b/tests/logictest/suites/ydb/select2-4.test index c1c385183b93b..961e7027c3366 100644 --- a/tests/logictest/suites/ydb/select2-4.test +++ b/tests/logictest/suites/ydb/select2-4.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select2-5.test b/tests/logictest/suites/ydb/select2-5.test index ccc5e01f347c3..b336a8b6acc60 100644 --- a/tests/logictest/suites/ydb/select2-5.test +++ b/tests/logictest/suites/ydb/select2-5.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-1.test b/tests/logictest/suites/ydb/select3-1.test index 2296e85b956f7..94db7e6044d1d 100644 --- a/tests/logictest/suites/ydb/select3-1.test +++ b/tests/logictest/suites/ydb/select3-1.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-10.test b/tests/logictest/suites/ydb/select3-10.test index 634626f5c0077..44a6e96356b48 100644 --- a/tests/logictest/suites/ydb/select3-10.test +++ b/tests/logictest/suites/ydb/select3-10.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-11.test b/tests/logictest/suites/ydb/select3-11.test index b6ac8039b283a..135bda0354f86 100644 --- a/tests/logictest/suites/ydb/select3-11.test +++ b/tests/logictest/suites/ydb/select3-11.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-12.test b/tests/logictest/suites/ydb/select3-12.test index dacd82079bbb4..28b8df7565b5b 100644 --- a/tests/logictest/suites/ydb/select3-12.test +++ b/tests/logictest/suites/ydb/select3-12.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-13.test b/tests/logictest/suites/ydb/select3-13.test index ade468d84d641..c8c4ff325f9e0 100644 --- a/tests/logictest/suites/ydb/select3-13.test +++ b/tests/logictest/suites/ydb/select3-13.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-14.test b/tests/logictest/suites/ydb/select3-14.test index 80498916ed640..4b16ac596c39b 100644 --- a/tests/logictest/suites/ydb/select3-14.test +++ b/tests/logictest/suites/ydb/select3-14.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-15.test b/tests/logictest/suites/ydb/select3-15.test index 99f2cf7ddc848..c3480702a8089 100644 --- a/tests/logictest/suites/ydb/select3-15.test +++ b/tests/logictest/suites/ydb/select3-15.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-2.test b/tests/logictest/suites/ydb/select3-2.test index 616640e37f369..37f22fa3d648e 100644 --- a/tests/logictest/suites/ydb/select3-2.test +++ b/tests/logictest/suites/ydb/select3-2.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-3.test b/tests/logictest/suites/ydb/select3-3.test index 9721bc4f291fc..b9cd7105044ab 100644 --- a/tests/logictest/suites/ydb/select3-3.test +++ b/tests/logictest/suites/ydb/select3-3.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-4.test b/tests/logictest/suites/ydb/select3-4.test index 74c67eaf31189..e79a40a2f5e4f 100644 --- a/tests/logictest/suites/ydb/select3-4.test +++ b/tests/logictest/suites/ydb/select3-4.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-5.test b/tests/logictest/suites/ydb/select3-5.test index 0a48c6925edb7..953f7cbcaa01e 100644 --- a/tests/logictest/suites/ydb/select3-5.test +++ b/tests/logictest/suites/ydb/select3-5.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-6.test b/tests/logictest/suites/ydb/select3-6.test index 2366ffcc01441..ab1bdd1b54677 100644 --- a/tests/logictest/suites/ydb/select3-6.test +++ b/tests/logictest/suites/ydb/select3-6.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-7.test b/tests/logictest/suites/ydb/select3-7.test index 9cedd523cbe01..06bc679257925 100644 --- a/tests/logictest/suites/ydb/select3-7.test +++ b/tests/logictest/suites/ydb/select3-7.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-8.test b/tests/logictest/suites/ydb/select3-8.test index fc42d21ae197b..1f90abb5b1d95 100644 --- a/tests/logictest/suites/ydb/select3-8.test +++ b/tests/logictest/suites/ydb/select3-8.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/logictest/suites/ydb/select3-9.test b/tests/logictest/suites/ydb/select3-9.test index ea58be4b903d7..9ae431e905ef4 100644 --- a/tests/logictest/suites/ydb/select3-9.test +++ b/tests/logictest/suites/ydb/select3-9.test @@ -1,8 +1,7 @@ statement ok drop table if exists t1 all; -statement ok -set enable_planner_v2 = 1; + statement ok create table t1(a integer null, b integer null, c integer null, d integer null, e integer null); diff --git a/tests/suites/0_stateless/04_explain/04_0002_explain_v2.sql b/tests/suites/0_stateless/04_explain/04_0002_explain_v2.sql index 2f594ca5965e8..87b8c758f5469 100644 --- a/tests/suites/0_stateless/04_explain/04_0002_explain_v2.sql +++ b/tests/suites/0_stateless/04_explain/04_0002_explain_v2.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select '===Explain==='; drop table if exists t1; drop table if exists t2; diff --git a/tests/suites/0_stateless/12_time_travel/12_0004_time_travel_select_at.sh b/tests/suites/0_stateless/12_time_travel/12_0004_time_travel_select_at.sh index ae17b38a31266..580a1e1d3c990 100755 --- a/tests/suites/0_stateless/12_time_travel/12_0004_time_travel_select_at.sh +++ b/tests/suites/0_stateless/12_time_travel/12_0004_time_travel_select_at.sh @@ -20,15 +20,14 @@ echo "counting the data set of first insertion, which should contain 2 rows" echo "select count(*) from t12_0004 at (snapshot => '$SNAPSHOT_ID')" | $MYSQL_CLIENT_CONNECT echo "planner_v2: counting the data set of first insertion, which should contain 2 rows" -echo "set enable_planner_v2 = 1;select count(t.c) from t12_0004 at (snapshot => '$SNAPSHOT_ID') as t" | $MYSQL_CLIENT_CONNECT +echo "select count(t.c) from t12_0004 at (snapshot => '$SNAPSHOT_ID') as t" | $MYSQL_CLIENT_CONNECT # Get a time point at/after the first insertion. TIMEPOINT=$(echo "select timestamp from fuse_snapshot('default', 't12_0004') where row_count=2" | $MYSQL_CLIENT_CONNECT) echo "planner_v2: counting the data set of first insertion by timestamp, which should contains 2 rows" -#echo "set enable_planner_v2 = 1;select count(t.c) from t12_0004 at (TIMESTAMP => $TIMEPOINT::TIMESTAMP) as t" | $MYSQL_CLIENT_CONNECT -echo "set enable_planner_v2 = 1;select count(t.c) from t12_0004 at (TIMESTAMP => '$TIMEPOINT'::TIMESTAMP) as t" | $MYSQL_CLIENT_CONNECT +echo "select count(t.c) from t12_0004 at (TIMESTAMP => '$TIMEPOINT'::TIMESTAMP) as t" | $MYSQL_CLIENT_CONNECT ## Drop table. echo "drop table t12_0004" | $MYSQL_CLIENT_CONNECT diff --git a/tests/suites/0_stateless/13_tpch/13_0001_q1.sql b/tests/suites/0_stateless/13_tpch/13_0001_q1.sql index 32e79fa307464..f60d0b8accbe5 100644 --- a/tests/suites/0_stateless/13_tpch/13_0001_q1.sql +++ b/tests/suites/0_stateless/13_tpch/13_0001_q1.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select l_returnflag, l_linestatus, diff --git a/tests/suites/0_stateless/13_tpch/13_0002_q2.sql b/tests/suites/0_stateless/13_tpch/13_0002_q2.sql index 34a1dbf051d67..02c462c96818d 100644 --- a/tests/suites/0_stateless/13_tpch/13_0002_q2.sql +++ b/tests/suites/0_stateless/13_tpch/13_0002_q2.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select s_acctbal, s_name, diff --git a/tests/suites/0_stateless/13_tpch/13_0003_q3.sql b/tests/suites/0_stateless/13_tpch/13_0003_q3.sql index 559fdfd7e0a26..d2014965df588 100644 --- a/tests/suites/0_stateless/13_tpch/13_0003_q3.sql +++ b/tests/suites/0_stateless/13_tpch/13_0003_q3.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select l_orderkey, sum(l_extendedprice * (1 - l_discount)) as revenue, diff --git a/tests/suites/0_stateless/13_tpch/13_0004_q4.sql b/tests/suites/0_stateless/13_tpch/13_0004_q4.sql index f7e75fc9e3a56..6d0ece2dc3703 100644 --- a/tests/suites/0_stateless/13_tpch/13_0004_q4.sql +++ b/tests/suites/0_stateless/13_tpch/13_0004_q4.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select o_orderpriority, count(*) as order_count diff --git a/tests/suites/0_stateless/13_tpch/13_0005_q5.sql b/tests/suites/0_stateless/13_tpch/13_0005_q5.sql index e5359cf8cba7e..4ebc19bee778c 100644 --- a/tests/suites/0_stateless/13_tpch/13_0005_q5.sql +++ b/tests/suites/0_stateless/13_tpch/13_0005_q5.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select n_name, sum(l_extendedprice * (1 - l_discount)) as revenue diff --git a/tests/suites/0_stateless/13_tpch/13_0007_q7.sql b/tests/suites/0_stateless/13_tpch/13_0007_q7.sql index 1958df0cd2d61..ab079f34d39db 100644 --- a/tests/suites/0_stateless/13_tpch/13_0007_q7.sql +++ b/tests/suites/0_stateless/13_tpch/13_0007_q7.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select supp_nation, cust_nation, diff --git a/tests/suites/0_stateless/13_tpch/13_0009_q9.sql b/tests/suites/0_stateless/13_tpch/13_0009_q9.sql index bdae1748f52a8..e1d0486fd607f 100644 --- a/tests/suites/0_stateless/13_tpch/13_0009_q9.sql +++ b/tests/suites/0_stateless/13_tpch/13_0009_q9.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select nation, o_year, diff --git a/tests/suites/0_stateless/13_tpch/13_0010_q10.sql b/tests/suites/0_stateless/13_tpch/13_0010_q10.sql index df48f38a056dc..2e4121e29c9a8 100644 --- a/tests/suites/0_stateless/13_tpch/13_0010_q10.sql +++ b/tests/suites/0_stateless/13_tpch/13_0010_q10.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select c_custkey, c_name, diff --git a/tests/suites/0_stateless/13_tpch/13_0011_q11.sql b/tests/suites/0_stateless/13_tpch/13_0011_q11.sql index 7b0b0c2571857..f43881b146e82 100644 --- a/tests/suites/0_stateless/13_tpch/13_0011_q11.sql +++ b/tests/suites/0_stateless/13_tpch/13_0011_q11.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select ps_partkey, sum(ps_supplycost * ps_availqty) as value diff --git a/tests/suites/0_stateless/13_tpch/13_0012_q12.sql b/tests/suites/0_stateless/13_tpch/13_0012_q12.sql index f87b52baf3232..77fb63a8a7f1a 100644 --- a/tests/suites/0_stateless/13_tpch/13_0012_q12.sql +++ b/tests/suites/0_stateless/13_tpch/13_0012_q12.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select l_shipmode, sum(case diff --git a/tests/suites/0_stateless/13_tpch/13_0013_q13.sql b/tests/suites/0_stateless/13_tpch/13_0013_q13.sql index 1732fe15b05bd..2e24ef41740cf 100644 --- a/tests/suites/0_stateless/13_tpch/13_0013_q13.sql +++ b/tests/suites/0_stateless/13_tpch/13_0013_q13.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select c_count, count(*) as custdist diff --git a/tests/suites/0_stateless/13_tpch/13_0014_q14.sql b/tests/suites/0_stateless/13_tpch/13_0014_q14.sql index 6c45a65258aab..1291d008e57aa 100644 --- a/tests/suites/0_stateless/13_tpch/13_0014_q14.sql +++ b/tests/suites/0_stateless/13_tpch/13_0014_q14.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select TRUNCATE(100.00 * sum(case when p_type like 'PROMO%' diff --git a/tests/suites/0_stateless/13_tpch/13_0015_q15.sql b/tests/suites/0_stateless/13_tpch/13_0015_q15.sql index 265ca6800af9a..bdfa66d782b9b 100644 --- a/tests/suites/0_stateless/13_tpch/13_0015_q15.sql +++ b/tests/suites/0_stateless/13_tpch/13_0015_q15.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select supp_nation, cust_nation, diff --git a/tests/suites/0_stateless/13_tpch/13_0016_q16.sql b/tests/suites/0_stateless/13_tpch/13_0016_q16.sql index 845f705657d1f..8e29065fb6d85 100644 --- a/tests/suites/0_stateless/13_tpch/13_0016_q16.sql +++ b/tests/suites/0_stateless/13_tpch/13_0016_q16.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select p_brand, p_type, diff --git a/tests/suites/0_stateless/13_tpch/13_0017_q17.sql b/tests/suites/0_stateless/13_tpch/13_0017_q17.sql index 370accbbd7818..21fdca88efeff 100644 --- a/tests/suites/0_stateless/13_tpch/13_0017_q17.sql +++ b/tests/suites/0_stateless/13_tpch/13_0017_q17.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select sum(l_extendedprice) / 7.0 as avg_yearly from diff --git a/tests/suites/0_stateless/13_tpch/13_0018_q18.sql b/tests/suites/0_stateless/13_tpch/13_0018_q18.sql index 63649fa088ec8..c235f435620e1 100644 --- a/tests/suites/0_stateless/13_tpch/13_0018_q18.sql +++ b/tests/suites/0_stateless/13_tpch/13_0018_q18.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select c_name, c_custkey, diff --git a/tests/suites/0_stateless/13_tpch/13_0019_q19.sql b/tests/suites/0_stateless/13_tpch/13_0019_q19.sql index b88036652b5f3..906ae8ad5f4aa 100644 --- a/tests/suites/0_stateless/13_tpch/13_0019_q19.sql +++ b/tests/suites/0_stateless/13_tpch/13_0019_q19.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select sum(l_extendedprice* (1 - l_discount)) as revenue from @@ -67,4 +67,3 @@ where ) and l_shipinstruct = 'DELIVER IN PERSON' ) -; \ No newline at end of file diff --git a/tests/suites/0_stateless/13_tpch/13_0020_q20.sql b/tests/suites/0_stateless/13_tpch/13_0020_q20.sql index 6adc0df6f3be9..473548eef54e9 100644 --- a/tests/suites/0_stateless/13_tpch/13_0020_q20.sql +++ b/tests/suites/0_stateless/13_tpch/13_0020_q20.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select s_name, s_address diff --git a/tests/suites/0_stateless/13_tpch/13_0021_q21.sql b/tests/suites/0_stateless/13_tpch/13_0021_q21.sql index b33768fdcfbf7..0874e5084f999 100644 --- a/tests/suites/0_stateless/13_tpch/13_0021_q21.sql +++ b/tests/suites/0_stateless/13_tpch/13_0021_q21.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select s_name, count(*) as numwait diff --git a/tests/suites/0_stateless/13_tpch/13_0022_q22.sql b/tests/suites/0_stateless/13_tpch/13_0022_q22.sql index ab1b13b8a9996..af1ff601910cb 100644 --- a/tests/suites/0_stateless/13_tpch/13_0022_q22.sql +++ b/tests/suites/0_stateless/13_tpch/13_0022_q22.sql @@ -1,4 +1,4 @@ -set enable_planner_v2 = 1; + select cntrycode, count(*) as numcust, diff --git a/tests/suites/0_stateless/20+_others/20_0009_format_diagnostic.sh b/tests/suites/0_stateless/20+_others/20_0009_format_diagnostic.sh index 3ea6cac7c983b..8354343716d52 100755 --- a/tests/suites/0_stateless/20+_others/20_0009_format_diagnostic.sh +++ b/tests/suites/0_stateless/20+_others/20_0009_format_diagnostic.sh @@ -15,7 +15,7 @@ cat << EOF > /tmp/databend_test_csv2.txt EOF -echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT + echo "create table a ( a datetime, b string, c int);" | $MYSQL_CLIENT_CONNECT curl -sH "insert_sql:insert into a format Csv" -H "skip_header:0" -F "upload=@/tmp/databend_test_csv1.txt" -F "upload=@/tmp/databend_test_csv2.txt" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" | grep "Error" diff --git a/tests/suites/1_stateful/00_copy/00_0000_copy_from_s3_location.sh b/tests/suites/1_stateful/00_copy/00_0000_copy_from_s3_location.sh index 673a250f6669f..7c0495756a99f 100755 --- a/tests/suites/1_stateful/00_copy/00_0000_copy_from_s3_location.sh +++ b/tests/suites/1_stateful/00_copy/00_0000_copy_from_s3_location.sh @@ -29,7 +29,7 @@ copy_from_location_cases=( for i in "${copy_from_location_cases[@]}"; do echo "$i" | $MYSQL_CLIENT_CONNECT - echo "select count(1), avg(\"Year\"), sum(\"DayOfWeek\") from ontime200" | $MYSQL_CLIENT_CONNECT + echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT done diff --git a/tests/suites/1_stateful/00_copy/00_0001_copy_from_http_location.sh b/tests/suites/1_stateful/00_copy/00_0001_copy_from_http_location.sh index bcddef3692830..3e4299e7eb6ed 100755 --- a/tests/suites/1_stateful/00_copy/00_0001_copy_from_http_location.sh +++ b/tests/suites/1_stateful/00_copy/00_0001_copy_from_http_location.sh @@ -22,7 +22,7 @@ copy_from_location_cases=( for i in "${copy_from_location_cases[@]}"; do echo "$i" | $MYSQL_CLIENT_CONNECT - echo "select count(1), avg(\"Year\"), sum(\"DayOfWeek\") from ontime200" | $MYSQL_CLIENT_CONNECT + echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT done diff --git a/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.sh b/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.sh index a851800f871c2..0d98e56f48b7a 100755 --- a/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.sh +++ b/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.sh @@ -3,7 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../../../shell_env.sh -echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT + echo "drop table if exists ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT ## create ontime table cat $CURDIR/../ddl/ontime.sql | sed 's/ontime/ontime_streaming_load/g' | $MYSQL_CLIENT_CONNECT @@ -38,32 +38,32 @@ fi # load csv curl -H "insert_sql:insert into ontime_streaming_load format Csv" -H "skip_header:1" -F "upload=@/tmp/ontime_200.csv" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 -echo "select count(1), avg(\"Year\"), sum(\"DayOfWeek\") from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT # load csv gz curl -H "insert_sql:insert into ontime_streaming_load format Csv" -H "skip_header:1" -H "compression:gzip" -F "upload=@/tmp/ontime_200.csv.gz" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 -echo "select count(1), avg(\"Year\"), sum(\"DayOfWeek\") from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT # load csv zstd curl -H "insert_sql:insert into ontime_streaming_load format Csv" -H "skip_header:1" -H "compression:zstd" -F "upload=@/tmp/ontime_200.csv.zst" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 -echo "select count(1), avg(\"Year\"), sum(\"DayOfWeek\") from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT # load csv bz2 curl -H "insert_sql:insert into ontime_streaming_load format Csv" -H "skip_header:1" -H "compression:bz2" -F "upload=@/tmp/ontime_200.csv.bz2" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 -echo "select count(1), avg(\"Year\"), sum(\"DayOfWeek\") from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT # load parquet curl -H "insert_sql:insert into ontime_streaming_load format Parquet" -H "skip_header:1" -F "upload=@/tmp/ontime_200.parquet" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 -echo "select count(1), avg(\"Year\"), sum(\"DayOfWeek\") from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT # load ndjson curl -H "insert_sql:insert into ontime_streaming_load format NdJson" -H "skip_header:1" -F "upload=@/tmp/ontime_200.ndjson" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 -echo "select count(1), avg(\"Year\"), sum(\"DayOfWeek\") from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT @@ -71,6 +71,19 @@ echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT cat $CURDIR/../ddl/ontime.sql | sed 's/ontime/ontime_test1/g' | sed 's/DATE/VARCHAR/g' | $MYSQL_CLIENT_CONNECT curl -s -H "insert_sql:insert into ontime_test1 format Parquet" -H "skip_header:1" -F "upload=@/tmp/ontime_200.parquet" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" | grep -c 'Code: 1024' +# load parquet with less schema +echo 'CREATE TABLE ontime_test2 +( + Year SMALLINT UNSIGNED, + Quarter TINYINT UNSIGNED, + Month TINYINT UNSIGNED, + DayofMonth TINYINT UNSIGNED, + DayOfWeek TINYINT UNSIGNED +)' | $MYSQL_CLIENT_CONNECT + +curl -s -H "insert_sql:insert into ontime_test2 format Parquet" -H "skip_header:1" -F "upload=@/tmp/ontime_200.parquet" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" | grep -c 'Code: 1024' +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime_test2;" | $MYSQL_CLIENT_CONNECT + echo "drop table ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT echo "drop table ontime_test1;" | $MYSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/01_load_v2/01_0001_upload_to_stage.sh b/tests/suites/1_stateful/01_load_v2/01_0001_upload_to_stage.sh index cb518258d8293..d4b1a5fdcefa7 100755 --- a/tests/suites/1_stateful/01_load_v2/01_0001_upload_to_stage.sh +++ b/tests/suites/1_stateful/01_load_v2/01_0001_upload_to_stage.sh @@ -3,7 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../../../shell_env.sh -echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT + echo "drop stage if exists s2;" | $MYSQL_CLIENT_CONNECT echo "CREATE STAGE if not exists s2;" | $MYSQL_CLIENT_CONNECT echo "list @s2" | $MYSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/01_load_v2/01_0002_remove_external_stage.sh b/tests/suites/1_stateful/01_load_v2/01_0002_remove_external_stage.sh index 99779187ffaf7..59de519f6332b 100755 --- a/tests/suites/1_stateful/01_load_v2/01_0002_remove_external_stage.sh +++ b/tests/suites/1_stateful/01_load_v2/01_0002_remove_external_stage.sh @@ -3,7 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../../../shell_env.sh -echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT + echo "drop stage if exists named_external_stage" | $MYSQL_CLIENT_CONNECT ## tempdate/ diff --git a/tests/suites/1_stateful/01_load_v2/01_0002_remove_internal_stage.sh b/tests/suites/1_stateful/01_load_v2/01_0002_remove_internal_stage.sh index bbbdd63d51b15..e12f53725ba4e 100755 --- a/tests/suites/1_stateful/01_load_v2/01_0002_remove_internal_stage.sh +++ b/tests/suites/1_stateful/01_load_v2/01_0002_remove_internal_stage.sh @@ -3,7 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../../../shell_env.sh -echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT + echo "drop stage if exists s1" | $MYSQL_CLIENT_CONNECT ## Copy from internal stage diff --git a/tests/suites/1_stateful/01_load_v2/01_0003_sync_stage_file.sh b/tests/suites/1_stateful/01_load_v2/01_0003_sync_stage_file.sh index 969dfef2b6581..0d98c1ac12358 100755 --- a/tests/suites/1_stateful/01_load_v2/01_0003_sync_stage_file.sh +++ b/tests/suites/1_stateful/01_load_v2/01_0003_sync_stage_file.sh @@ -3,7 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../../../shell_env.sh -echo "set enable_planner_v2 = 1;" | $MYSQL_CLIENT_CONNECT + echo "drop stage if exists test_sync" | $MYSQL_CLIENT_CONNECT echo "CREATE STAGE test_sync;" | $MYSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/01_load_v2/01_0004_streaming_variant_load.sh b/tests/suites/1_stateful/01_load_v2/01_0004_streaming_variant_load.sh index 5f788387c83ac..9978d578a89f3 100755 --- a/tests/suites/1_stateful/01_load_v2/01_0004_streaming_variant_load.sh +++ b/tests/suites/1_stateful/01_load_v2/01_0004_streaming_variant_load.sh @@ -26,7 +26,7 @@ fi # load csv curl -H "insert_sql:insert into variant_test_streaming_load format Csv" -H "skip_header:0" -H 'field_delimiter: ,' -H 'record_delimiter: \n' -F "upload=@/tmp/json_sample1.csv" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 curl -H "insert_sql:insert into variant_test_streaming_load format Csv" -H "skip_header:0" -H 'field_delimiter: |' -H 'record_delimiter: \n' -F "upload=@/tmp/json_sample2.csv" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 -echo "select * from variant_test_streaming_load order by \"Id\" asc;" | $MYSQL_CLIENT_CONNECT +echo "select * from variant_test_streaming_load order by Id asc;" | $MYSQL_CLIENT_CONNECT echo "truncate table variant_test_streaming_load" | $MYSQL_CLIENT_CONNECT echo "drop table variant_test_streaming_load;" | $MYSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/04_mini_dataset/04_0000_mini_ontime.sh b/tests/suites/1_stateful/04_mini_dataset/04_0000_mini_ontime.sh index 81dee44bec3e1..72ec999ba1e39 100755 --- a/tests/suites/1_stateful/04_mini_dataset/04_0000_mini_ontime.sh +++ b/tests/suites/1_stateful/04_mini_dataset/04_0000_mini_ontime.sh @@ -23,9 +23,6 @@ ontime_statements=( "SELECT OriginCityName, DestCityName, count(*) AS c FROM ontime_mini GROUP BY OriginCityName, DestCityName ORDER BY c DESC LIMIT 10;" ) -for i in "${ontime_statements[@]}"; do - echo "set enable_planner_v2 = 1; set unquoted_ident_case_sensitive = 1; $i" | $MYSQL_CLIENT_CONNECT -done ## Clean table echo "drop table if exists ontime_mini all;" | $MYSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/04_mini_dataset/04_0001_mini_hits.sh b/tests/suites/1_stateful/04_mini_dataset/04_0001_mini_hits.sh index d4cd26b99a199..016e1c1fee270 100755 --- a/tests/suites/1_stateful/04_mini_dataset/04_0001_mini_hits.sh +++ b/tests/suites/1_stateful/04_mini_dataset/04_0001_mini_hits.sh @@ -99,9 +99,5 @@ hits_statements=( "SELECT DATE_TRUNC(minute, EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY M ORDER BY M LIMIT 10 OFFSET 1000;" ) -for i in "${hits_statements[@]}"; do - echo "set enable_planner_v2 = 1; set unquoted_ident_case_sensitive = 1; $i" | $MYSQL_CLIENT_CONNECT -done - ## Clean up echo "drop table if exists hits all;" | $MYSQL_CLIENT_CONNECT diff --git a/tests/suites/1_stateful/ddl/hits.sql b/tests/suites/1_stateful/ddl/hits.sql index 4bead5f1c9fc3..8e3fc09cf22f8 100644 --- a/tests/suites/1_stateful/ddl/hits.sql +++ b/tests/suites/1_stateful/ddl/hits.sql @@ -1,109 +1,109 @@ CREATE TABLE hits ( - "WatchID" BIGINT NOT NULL, - "JavaEnable" SMALLINT NOT NULL, - "Title" TEXT NOT NULL, - "GoodEvent" SMALLINT NOT NULL, - "EventTime" TIMESTAMP NOT NULL, - "EventDate" Date NOT NULL, - "CounterID" INTEGER NOT NULL, - "ClientIP" INTEGER NOT NULL, - "RegionID" INTEGER NOT NULL, - "UserID" BIGINT NOT NULL, - "CounterClass" SMALLINT NOT NULL, - "OS" SMALLINT NOT NULL, - "UserAgent" SMALLINT NOT NULL, - "URL" TEXT NOT NULL, - "Referer" TEXT NOT NULL, - "IsRefresh" SMALLINT NOT NULL, - "RefererCategoryID" SMALLINT NOT NULL, - "RefererRegionID" INTEGER NOT NULL, - "URLCategoryID" SMALLINT NOT NULL, - "URLRegionID" INTEGER NOT NULL, - "ResolutionWidth" SMALLINT NOT NULL, - "ResolutionHeight" SMALLINT NOT NULL, - "ResolutionDepth" SMALLINT NOT NULL, - "FlashMajor" SMALLINT NOT NULL, - "FlashMinor" SMALLINT NOT NULL, - "FlashMinor2" TEXT NOT NULL, - "NetMajor" SMALLINT NOT NULL, - "NetMinor" SMALLINT NOT NULL, - "UserAgentMajor" SMALLINT NOT NULL, - "UserAgentMinor" VARCHAR(255) NOT NULL, - "CookieEnable" SMALLINT NOT NULL, - "JavascriptEnable" SMALLINT NOT NULL, - "IsMobile" SMALLINT NOT NULL, - "MobilePhone" SMALLINT NOT NULL, - "MobilePhoneModel" TEXT NOT NULL, - "Params" TEXT NOT NULL, - "IPNetworkID" INTEGER NOT NULL, - "TraficSourceID" SMALLINT NOT NULL, - "SearchEngineID" SMALLINT NOT NULL, - "SearchPhrase" TEXT NOT NULL, - "AdvEngineID" SMALLINT NOT NULL, - "IsArtifical" SMALLINT NOT NULL, - "WindowClientWidth" SMALLINT NOT NULL, - "WindowClientHeight" SMALLINT NOT NULL, - "ClientTimeZone" SMALLINT NOT NULL, - "ClientEventTime" TIMESTAMP NOT NULL, - "SilverlightVersion1" SMALLINT NOT NULL, - "SilverlightVersion2" SMALLINT NOT NULL, - "SilverlightVersion3" INTEGER NOT NULL, - "SilverlightVersion4" SMALLINT NOT NULL, - "PageCharset" TEXT NOT NULL, - "CodeVersion" INTEGER NOT NULL, - "IsLink" SMALLINT NOT NULL, - "IsDownload" SMALLINT NOT NULL, - "IsNotBounce" SMALLINT NOT NULL, - "FUniqID" BIGINT NOT NULL, - "OriginalURL" TEXT NOT NULL, - "HID" INTEGER NOT NULL, - "IsOldCounter" SMALLINT NOT NULL, - "IsEvent" SMALLINT NOT NULL, - "IsParameter" SMALLINT NOT NULL, - "DontCountHits" SMALLINT NOT NULL, - "WithHash" SMALLINT NOT NULL, - "HitColor" CHAR NOT NULL, - "LocalEventTime" TIMESTAMP NOT NULL, - "Age" SMALLINT NOT NULL, - "Sex" SMALLINT NOT NULL, - "Income" SMALLINT NOT NULL, - "Interests" SMALLINT NOT NULL, - "Robotness" SMALLINT NOT NULL, - "RemoteIP" INTEGER NOT NULL, - "WindowName" INTEGER NOT NULL, - "OpenerName" INTEGER NOT NULL, - "HistoryLength" SMALLINT NOT NULL, - "BrowserLanguage" TEXT NOT NULL, - "BrowserCountry" TEXT NOT NULL, - "SocialNetwork" TEXT NOT NULL, - "SocialAction" TEXT NOT NULL, - "HTTPError" SMALLINT NOT NULL, - "SendTiming" INTEGER NOT NULL, - "DNSTiming" INTEGER NOT NULL, - "ConnectTiming" INTEGER NOT NULL, - "ResponseStartTiming" INTEGER NOT NULL, - "ResponseEndTiming" INTEGER NOT NULL, - "FetchTiming" INTEGER NOT NULL, - "SocialSourceNetworkID" SMALLINT NOT NULL, - "SocialSourcePage" TEXT NOT NULL, - "ParamPrice" BIGINT NOT NULL, - "ParamOrderID" TEXT NOT NULL, - "ParamCurrency" TEXT NOT NULL, - "ParamCurrencyID" SMALLINT NOT NULL, - "OpenstatServiceName" TEXT NOT NULL, - "OpenstatCampaignID" TEXT NOT NULL, - "OpenstatAdID" TEXT NOT NULL, - "OpenstatSourceID" TEXT NOT NULL, - "UTMSource" TEXT NOT NULL, - "UTMMedium" TEXT NOT NULL, - "UTMCampaign" TEXT NOT NULL, - "UTMContent" TEXT NOT NULL, - "UTMTerm" TEXT NOT NULL, - "FromTag" TEXT NOT NULL, - "HasGCLID" SMALLINT NOT NULL, - "RefererHash" BIGINT NOT NULL, - "URLHash" BIGINT NOT NULL, - "CLID" INTEGER NOT NULL + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT NOT NULL, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT NOT NULL, + Referer TEXT NOT NULL, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT NOT NULL, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT NOT NULL, + Params TEXT NOT NULL, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT NOT NULL, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT NOT NULL, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT NOT NULL, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT NOT NULL, + BrowserCountry TEXT NOT NULL, + SocialNetwork TEXT NOT NULL, + SocialAction TEXT NOT NULL, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT NOT NULL, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT NOT NULL, + ParamCurrency TEXT NOT NULL, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT NOT NULL, + OpenstatCampaignID TEXT NOT NULL, + OpenstatAdID TEXT NOT NULL, + OpenstatSourceID TEXT NOT NULL, + UTMSource TEXT NOT NULL, + UTMMedium TEXT NOT NULL, + UTMCampaign TEXT NOT NULL, + UTMContent TEXT NOT NULL, + UTMTerm TEXT NOT NULL, + FromTag TEXT NOT NULL, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL ) -CLUSTER BY ("CounterID", "EventDate", "UserID", "EventTime", "WatchID"); \ No newline at end of file +CLUSTER BY (CounterID, EventDate, UserID, EventTime, WatchID); \ No newline at end of file diff --git a/tests/suites/1_stateful/ddl/ontime.sql b/tests/suites/1_stateful/ddl/ontime.sql index 075371d56d4e6..fe849a131efb0 100644 --- a/tests/suites/1_stateful/ddl/ontime.sql +++ b/tests/suites/1_stateful/ddl/ontime.sql @@ -1,112 +1,112 @@ CREATE TABLE ontime ( - "Year" SMALLINT UNSIGNED, - "Quarter" TINYINT UNSIGNED, - "Month" TINYINT UNSIGNED, - "DayofMonth" TINYINT UNSIGNED, - "DayOfWeek" TINYINT UNSIGNED, - "FlightDate" DATE, - "Reporting_Airline" VARCHAR, - "DOT_ID_Reporting_Airline" INT, - "IATA_CODE_Reporting_Airline" VARCHAR, - "Tail_Number" VARCHAR, - "Flight_Number_Reporting_Airline" VARCHAR, - "OriginAirportID" INT, - "OriginAirportSeqID" INT, - "OriginCityMarketID" INT, - "Origin" VARCHAR, - "OriginCityName" VARCHAR, - "OriginState" VARCHAR, - "OriginStateFips" VARCHAR, - "OriginStateName" VARCHAR, - "OriginWac" INT, - "DestAirportID" INT, - "DestAirportSeqID" INT, - "DestCityMarketID" INT, - "Dest" VARCHAR, - "DestCityName" VARCHAR, - "DestState" VARCHAR, - "DestStateFips" VARCHAR, - "DestStateName" VARCHAR, - "DestWac" INT, - "CRSDepTime" INT, - "DepTime" INT, - "DepDelay" INT, - "DepDelayMinutes" INT, - "DepDel15" INT, - "DepartureDelayGroups" VARCHAR, - "DepTimeBlk" VARCHAR, - "TaxiOut" INT, - "WheelsOff" INT, - "WheelsOn" INT, - "TaxiIn" INT, - "CRSArrTime" INT, - "ArrTime" INT, - "ArrDelay" INT, - "ArrDelayMinutes" INT, - "ArrDel15" INT, - "ArrivalDelayGroups" INT, - "ArrTimeBlk" VARCHAR, - "Cancelled" TINYINT UNSIGNED, - "CancellationCode" VARCHAR, - "Diverted" TINYINT UNSIGNED, - "CRSElapsedTime" INT, - "ActualElapsedTime" INT, - "AirTime" INT, - "Flights" INT, - "Distance" INT, - "DistanceGroup" TINYINT UNSIGNED, - "CarrierDelay" INT, - "WeatherDelay" INT, - "NASDelay" INT, - "SecurityDelay" INT, - "LateAircraftDelay" INT, - "FirstDepTime" VARCHAR, - "TotalAddGTime" VARCHAR, - "LongestAddGTime" VARCHAR, - "DivAirportLandings" VARCHAR, - "DivReachedDest" VARCHAR, - "DivActualElapsedTime" VARCHAR, - "DivArrDelay" VARCHAR, - "DivDistance" VARCHAR, - "Div1Airport" VARCHAR, - "Div1AirportID" INT, - "Div1AirportSeqID" INT, - "Div1WheelsOn" VARCHAR, - "Div1TotalGTime" VARCHAR, - "Div1LongestGTime" VARCHAR, - "Div1WheelsOff" VARCHAR, - "Div1TailNum" VARCHAR, - "Div2Airport" VARCHAR, - "Div2AirportID" INT, - "Div2AirportSeqID" INT, - "Div2WheelsOn" VARCHAR, - "Div2TotalGTime" VARCHAR, - "Div2LongestGTime" VARCHAR, - "Div2WheelsOff" VARCHAR, - "Div2TailNum" VARCHAR, - "Div3Airport" VARCHAR, - "Div3AirportID" INT, - "Div3AirportSeqID" INT, - "Div3WheelsOn" VARCHAR, - "Div3TotalGTime" VARCHAR, - "Div3LongestGTime" VARCHAR, - "Div3WheelsOff" VARCHAR, - "Div3TailNum" VARCHAR, - "Div4Airport" VARCHAR, - "Div4AirportID" INT, - "Div4AirportSeqID" INT, - "Div4WheelsOn" VARCHAR, - "Div4TotalGTime" VARCHAR, - "Div4LongestGTime" VARCHAR, - "Div4WheelsOff" VARCHAR, - "Div4TailNum" VARCHAR, - "Div5Airport" VARCHAR, - "Div5AirportID" INT, - "Div5AirportSeqID" INT, - "Div5WheelsOn" VARCHAR, - "Div5TotalGTime" VARCHAR, - "Div5LongestGTime" VARCHAR, - "Div5WheelsOff" VARCHAR, - "Div5TailNum" VARCHAR + Year SMALLINT UNSIGNED, + Quarter TINYINT UNSIGNED, + Month TINYINT UNSIGNED, + DayofMonth TINYINT UNSIGNED, + DayOfWeek TINYINT UNSIGNED, + FlightDate DATE, + Reporting_Airline VARCHAR, + DOT_ID_Reporting_Airline INT, + IATA_CODE_Reporting_Airline VARCHAR, + Tail_Number VARCHAR, + Flight_Number_Reporting_Airline VARCHAR, + OriginAirportID INT, + OriginAirportSeqID INT, + OriginCityMarketID INT, + Origin VARCHAR, + OriginCityName VARCHAR, + OriginState VARCHAR, + OriginStateFips VARCHAR, + OriginStateName VARCHAR, + OriginWac INT, + DestAirportID INT, + DestAirportSeqID INT, + DestCityMarketID INT, + Dest VARCHAR, + DestCityName VARCHAR, + DestState VARCHAR, + DestStateFips VARCHAR, + DestStateName VARCHAR, + DestWac INT, + CRSDepTime INT, + DepTime INT, + DepDelay INT, + DepDelayMinutes INT, + DepDel15 INT, + DepartureDelayGroups VARCHAR, + DepTimeBlk VARCHAR, + TaxiOut INT, + WheelsOff INT, + WheelsOn INT, + TaxiIn INT, + CRSArrTime INT, + ArrTime INT, + ArrDelay INT, + ArrDelayMinutes INT, + ArrDel15 INT, + ArrivalDelayGroups INT, + ArrTimeBlk VARCHAR, + Cancelled TINYINT UNSIGNED, + CancellationCode VARCHAR, + Diverted TINYINT UNSIGNED, + CRSElapsedTime INT, + ActualElapsedTime INT, + AirTime INT, + Flights INT, + Distance INT, + DistanceGroup TINYINT UNSIGNED, + CarrierDelay INT, + WeatherDelay INT, + NASDelay INT, + SecurityDelay INT, + LateAircraftDelay INT, + FirstDepTime VARCHAR, + TotalAddGTime VARCHAR, + LongestAddGTime VARCHAR, + DivAirportLandings VARCHAR, + DivReachedDest VARCHAR, + DivActualElapsedTime VARCHAR, + DivArrDelay VARCHAR, + DivDistance VARCHAR, + Div1Airport VARCHAR, + Div1AirportID INT, + Div1AirportSeqID INT, + Div1WheelsOn VARCHAR, + Div1TotalGTime VARCHAR, + Div1LongestGTime VARCHAR, + Div1WheelsOff VARCHAR, + Div1TailNum VARCHAR, + Div2Airport VARCHAR, + Div2AirportID INT, + Div2AirportSeqID INT, + Div2WheelsOn VARCHAR, + Div2TotalGTime VARCHAR, + Div2LongestGTime VARCHAR, + Div2WheelsOff VARCHAR, + Div2TailNum VARCHAR, + Div3Airport VARCHAR, + Div3AirportID INT, + Div3AirportSeqID INT, + Div3WheelsOn VARCHAR, + Div3TotalGTime VARCHAR, + Div3LongestGTime VARCHAR, + Div3WheelsOff VARCHAR, + Div3TailNum VARCHAR, + Div4Airport VARCHAR, + Div4AirportID INT, + Div4AirportSeqID INT, + Div4WheelsOn VARCHAR, + Div4TotalGTime VARCHAR, + Div4LongestGTime VARCHAR, + Div4WheelsOff VARCHAR, + Div4TailNum VARCHAR, + Div5Airport VARCHAR, + Div5AirportID INT, + Div5AirportSeqID INT, + Div5WheelsOn VARCHAR, + Div5TotalGTime VARCHAR, + Div5LongestGTime VARCHAR, + Div5WheelsOff VARCHAR, + Div5TailNum VARCHAR ); diff --git a/tests/suites/1_stateful/ddl/variant_test.sql b/tests/suites/1_stateful/ddl/variant_test.sql index e7ab44ac5fdc0..13df94f7e8fbf 100644 --- a/tests/suites/1_stateful/ddl/variant_test.sql +++ b/tests/suites/1_stateful/ddl/variant_test.sql @@ -1,5 +1,5 @@ CREATE TABLE variant_test ( - "Id" Int, - "Var" Variant + Id Int, + Var Variant ); \ No newline at end of file From c21e0be583a2a214f03b2054b1fa1991ac3359d8 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 9 Aug 2022 13:42:22 +0800 Subject: [PATCH 09/59] feat(query): add tests --- common/formats/src/format_parquet.rs | 24 +++++++++---------- .../01_load_v2/01_0000_streaming_load.result | 1 + .../01_load_v2/01_0000_streaming_load.sh | 17 ++++++------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/common/formats/src/format_parquet.rs b/common/formats/src/format_parquet.rs index c6504b6c9152b..6cf93b866e767 100644 --- a/common/formats/src/format_parquet.rs +++ b/common/formats/src/format_parquet.rs @@ -29,6 +29,7 @@ use common_arrow::parquet::metadata::RowGroupMetaData; use common_arrow::parquet::read::read_metadata; use common_datablocks::DataBlock; use common_datavalues::remove_nullable; +use common_datavalues::DataField; use common_datavalues::DataSchema; use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; @@ -53,7 +54,6 @@ impl InputState for ParquetInputState { pub struct ParquetInputFormat { schema: DataSchemaRef, - arrow_table_schema: ArrowSchema, } impl ParquetInputFormat { @@ -69,11 +69,7 @@ impl ParquetInputFormat { } pub fn try_create(_name: &str, schema: DataSchemaRef) -> Result> { - let arrow_table_schema = schema.to_arrow(); - Ok(Arc::new(ParquetInputFormat { - schema, - arrow_table_schema, - })) + Ok(Arc::new(ParquetInputFormat { schema })) } fn read_meta_data(cursor: &mut Cursor<&Vec>) -> Result { @@ -130,22 +126,25 @@ impl InputFormat for ParquetInputFormat { let mut cursor = Cursor::new(&split.buf); let parquet_metadata = Self::read_meta_data(&mut cursor)?; let infer_schema = read::infer_schema(&parquet_metadata)?; - let actually_schema = DataSchema::from(&infer_schema); + let mut read_fields = Vec::with_capacity(self.schema.num_fields()); for f in self.schema.fields().iter() { - if let Some(m) = actually_schema - .fields() + if let Some(m) = infer_schema + .fields .iter() - .filter(|c| c.name().eq_ignore_ascii_case(f.name())) + .filter(|c| c.name.eq_ignore_ascii_case(f.name())) .last() { - if remove_nullable(m.data_type()) != remove_nullable(f.data_type()) { + let tf = DataField::from(m); + if remove_nullable(tf.data_type()) != remove_nullable(f.data_type()) { let diff = Diff::from_debug(f, m, "expected_field", "infer_field"); return Err(ErrorCode::ParquetError(format!( "parquet schema mismatch, differ: {}", diff ))); } + + read_fields.push(m.clone()); } else { return Err(ErrorCode::ParquetError(format!( "schema field size mismatch, expected to find column: {}", @@ -154,11 +153,10 @@ impl InputFormat for ParquetInputFormat { } } - let fields = &self.arrow_table_schema.fields; let mut data_blocks = Vec::with_capacity(parquet_metadata.row_groups.len()); for row_group in &parquet_metadata.row_groups { - let arrays = Self::read_columns(fields, row_group, &mut cursor)?; + let arrays = Self::read_columns(&read_fields, row_group, &mut cursor)?; let chunk = Self::deserialize(row_group.num_rows() as usize, arrays)?; data_blocks.push(DataBlock::from_chunk(&self.schema, &chunk)?); } diff --git a/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.result b/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.result index d8e38b2d7e80a..bc376559aa696 100644 --- a/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.result +++ b/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.result @@ -4,4 +4,5 @@ 199 2020.0 769 199 2020.0 769 199 2020.0 769 +199 2020.0 769 1 diff --git a/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.sh b/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.sh index 0d98e56f48b7a..a72bbb05ef766 100755 --- a/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.sh +++ b/tests/suites/1_stateful/01_load_v2/01_0000_streaming_load.sh @@ -67,12 +67,8 @@ echo "select count(1), avg(Year), sum(DayOfWeek) from ontime_streaming_load;" | echo "truncate table ontime_streaming_load" | $MYSQL_CLIENT_CONNECT -# load parquet with mismatch schema -cat $CURDIR/../ddl/ontime.sql | sed 's/ontime/ontime_test1/g' | sed 's/DATE/VARCHAR/g' | $MYSQL_CLIENT_CONNECT -curl -s -H "insert_sql:insert into ontime_test1 format Parquet" -H "skip_header:1" -F "upload=@/tmp/ontime_200.parquet" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" | grep -c 'Code: 1024' - # load parquet with less schema -echo 'CREATE TABLE ontime_test2 +echo 'CREATE TABLE ontime_less ( Year SMALLINT UNSIGNED, Quarter TINYINT UNSIGNED, @@ -81,9 +77,14 @@ echo 'CREATE TABLE ontime_test2 DayOfWeek TINYINT UNSIGNED )' | $MYSQL_CLIENT_CONNECT -curl -s -H "insert_sql:insert into ontime_test2 format Parquet" -H "skip_header:1" -F "upload=@/tmp/ontime_200.parquet" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" | grep -c 'Code: 1024' -echo "select count(1), avg(Year), sum(DayOfWeek) from ontime_test2;" | $MYSQL_CLIENT_CONNECT +curl -s -H "insert_sql:insert into ontime_less format Parquet" -H "skip_header:1" -F "upload=@/tmp/ontime_200.parquet" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" > /dev/null 2>&1 +echo "select count(1), avg(Year), sum(DayOfWeek) from ontime_less;" | $MYSQL_CLIENT_CONNECT + +# load parquet with mismatch schema +cat $CURDIR/../ddl/ontime.sql | sed 's/ontime/ontime_test_mismatch/g' | sed 's/DATE/VARCHAR/g' | $MYSQL_CLIENT_CONNECT +curl -s -H "insert_sql:insert into ontime_test_mismatch format Parquet" -H "skip_header:1" -F "upload=@/tmp/ontime_200.parquet" -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" | grep -c 'Code: 1024' echo "drop table ontime_streaming_load;" | $MYSQL_CLIENT_CONNECT -echo "drop table ontime_test1;" | $MYSQL_CLIENT_CONNECT +echo "drop table ontime_test_mismatch;" | $MYSQL_CLIENT_CONNECT +echo "drop table ontime_less;" | $MYSQL_CLIENT_CONNECT From cea41e945fe25c19969abc3def2092889d20853e Mon Sep 17 00:00:00 2001 From: lichuang Date: Tue, 9 Aug 2022 16:38:44 +0800 Subject: [PATCH 10/59] refactor by comment --- common/ast/src/ast/statements/share.rs | 8 ++++---- common/ast/tests/it/testdata/statement.txt | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/common/ast/src/ast/statements/share.rs b/common/ast/src/ast/statements/share.rs index f53da8a10903a..03010423dca47 100644 --- a/common/ast/src/ast/statements/share.rs +++ b/common/ast/src/ast/statements/share.rs @@ -113,18 +113,18 @@ impl Display for AlterShareAccountsStmt<'_> { } write!(f, "{}", self.share)?; if self.add { - write!(f, " ADD TENANTS ")?; + write!(f, " ADD TENANTS = ")?; } else { - write!(f, " REMOVE TENANTS ")?; + write!(f, " REMOVE TENANTS = ")?; } let mut first = true; for account in self.tenants.iter() { if !first { - write!(f, " , ")?; + write!(f, ",")?; } else { first = false; } - write!(f, " {} ", account)?; + write!(f, "{}", account)?; } Ok(()) diff --git a/common/ast/tests/it/testdata/statement.txt b/common/ast/tests/it/testdata/statement.txt index 49841d9e87756..cbeef09a50eda 100644 --- a/common/ast/tests/it/testdata/statement.txt +++ b/common/ast/tests/it/testdata/statement.txt @@ -6093,7 +6093,7 @@ RevokeShareObject( ---------- Input ---------- ALTER SHARE a ADD TENANTS = b,c; ---------- Output --------- -ALTER SHARE a ADD TENANTS b , c +ALTER SHARE a ADD TENANTS = b,c ---------- AST ------------ AlterShareAccounts( AlterShareAccountsStmt { @@ -6123,7 +6123,7 @@ AlterShareAccounts( ---------- Input ---------- ALTER SHARE IF EXISTS a ADD TENANTS = b,c; ---------- Output --------- -ALTER SHARE IF EXISTS a ADD TENANTS b , c +ALTER SHARE IF EXISTS a ADD TENANTS = b,c ---------- AST ------------ AlterShareAccounts( AlterShareAccountsStmt { @@ -6153,7 +6153,7 @@ AlterShareAccounts( ---------- Input ---------- ALTER SHARE IF EXISTS a REMOVE TENANTS = b,c; ---------- Output --------- -ALTER SHARE IF EXISTS a REMOVE TENANTS b , c +ALTER SHARE IF EXISTS a REMOVE TENANTS = b,c ---------- AST ------------ AlterShareAccounts( AlterShareAccountsStmt { From 86a7c4844721eedcbd7499bc3990151698d32140 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Tue, 9 Aug 2022 16:44:05 +0800 Subject: [PATCH 11/59] feat(query): fix tests --- common/datavalues/src/columns/array/mod.rs | 28 ++++++++++++++----- common/formats/src/format_parquet.rs | 3 -- .../00_copy/00_0000_copy_from_stage.sh | 4 +-- tests/suites/1_stateful/ddl/hits.sql | 2 +- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/common/datavalues/src/columns/array/mod.rs b/common/datavalues/src/columns/array/mod.rs index aa9faa5cabf04..047d91f111e49 100644 --- a/common/datavalues/src/columns/array/mod.rs +++ b/common/datavalues/src/columns/array/mod.rs @@ -16,6 +16,8 @@ use std::sync::Arc; use common_arrow::arrow::array::*; use common_arrow::arrow::buffer::Buffer; +use common_arrow::arrow::compute::cast::CastOptions; +use common_arrow::arrow::compute::cast::{self}; use common_arrow::arrow::datatypes::DataType as ArrowType; use common_arrow::arrow::types::Index; use common_arrow::ArrayRef; @@ -62,13 +64,25 @@ impl ArrayColumn { } pub fn from_arrow_array(array: &dyn Array) -> Self { - Self::new( - array - .as_any() - .downcast_ref::() - .unwrap() - .clone(), - ) + let cast_options = CastOptions { + wrapped: true, + partial: true, + }; + + match array.data_type() { + ArrowType::List(f) => { + let array = cast::cast(array, &ArrowType::LargeList(f.clone()), cast_options) + .expect("list to large list cast should be ok"); + Self::from_arrow_array(array.as_ref()) + } + _ => Self::new( + array + .as_any() + .downcast_ref::() + .unwrap() + .clone(), + ), + } } pub fn from_data(data_type: DataTypeImpl, offsets: Buffer, values: ColumnRef) -> Self { diff --git a/common/formats/src/format_parquet.rs b/common/formats/src/format_parquet.rs index 6cf93b866e767..b606b0d4305b7 100644 --- a/common/formats/src/format_parquet.rs +++ b/common/formats/src/format_parquet.rs @@ -19,7 +19,6 @@ use std::sync::Arc; use common_arrow::arrow::array::Array; use common_arrow::arrow::chunk::Chunk; use common_arrow::arrow::datatypes::Field; -use common_arrow::arrow::datatypes::Schema as ArrowSchema; use common_arrow::arrow::io::parquet::read; use common_arrow::arrow::io::parquet::read::read_columns_many; use common_arrow::arrow::io::parquet::read::ArrayIter; @@ -30,7 +29,6 @@ use common_arrow::parquet::read::read_metadata; use common_datablocks::DataBlock; use common_datavalues::remove_nullable; use common_datavalues::DataField; -use common_datavalues::DataSchema; use common_datavalues::DataSchemaRef; use common_exception::ErrorCode; use common_exception::Result; @@ -154,7 +152,6 @@ impl InputFormat for ParquetInputFormat { } let mut data_blocks = Vec::with_capacity(parquet_metadata.row_groups.len()); - for row_group in &parquet_metadata.row_groups { let arrays = Self::read_columns(&read_fields, row_group, &mut cursor)?; let chunk = Self::deserialize(row_group.num_rows() as usize, arrays)?; diff --git a/tests/suites/1_stateful/00_copy/00_0000_copy_from_stage.sh b/tests/suites/1_stateful/00_copy/00_0000_copy_from_stage.sh index c2efd8ce91193..ad9d97d57f74e 100755 --- a/tests/suites/1_stateful/00_copy/00_0000_copy_from_stage.sh +++ b/tests/suites/1_stateful/00_copy/00_0000_copy_from_stage.sh @@ -38,7 +38,7 @@ copy_from_stage_cases=( for i in "${copy_from_stage_cases[@]}"; do echo "$i" | $MYSQL_CLIENT_CONNECT - echo "select count(1), avg(\"Year\"), sum(\"DayOfWeek\") from ontime200" | $MYSQL_CLIENT_CONNECT + echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT done @@ -61,7 +61,7 @@ copy_from_named_external_stage_cases=( for i in "${copy_from_named_external_stage_cases[@]}"; do echo "$i" | $MYSQL_CLIENT_CONNECT - echo "select count(1), avg(\"Year\"), sum(\"DayOfWeek\") from ontime200" | $MYSQL_CLIENT_CONNECT + echo "select count(1), avg(Year), sum(DayOfWeek) from ontime200" | $MYSQL_CLIENT_CONNECT echo "truncate table ontime200" | $MYSQL_CLIENT_CONNECT done diff --git a/tests/suites/1_stateful/ddl/hits.sql b/tests/suites/1_stateful/ddl/hits.sql index 8e3fc09cf22f8..b446288b409e5 100644 --- a/tests/suites/1_stateful/ddl/hits.sql +++ b/tests/suites/1_stateful/ddl/hits.sql @@ -106,4 +106,4 @@ CREATE TABLE hits URLHash BIGINT NOT NULL, CLID INTEGER NOT NULL ) -CLUSTER BY (CounterID, EventDate, UserID, EventTime, WatchID); \ No newline at end of file +CLUSTER BY (CounterID, EventDate, UserID, EventTime, WatchID); From 3d604958ba751d62a16d7c6aec6bb46ebd47acc3 Mon Sep 17 00:00:00 2001 From: lichuang Date: Tue, 9 Aug 2022 17:45:13 +0800 Subject: [PATCH 12/59] refactor by comment --- common/ast/src/ast/statements/share.rs | 23 ++++++++----------- common/ast/src/ast/statements/statement.rs | 2 +- common/ast/src/parser/statement.rs | 8 +++---- common/ast/tests/it/testdata/statement.txt | 12 +++++----- .../interpreter_share_alter_tenants.rs | 2 +- query/src/sql/planner/binder/ddl/share.rs | 19 +++++++-------- query/src/sql/planner/plans/share.rs | 2 +- 7 files changed, 33 insertions(+), 35 deletions(-) diff --git a/common/ast/src/ast/statements/share.rs b/common/ast/src/ast/statements/share.rs index 03010423dca47..952207291939c 100644 --- a/common/ast/src/ast/statements/share.rs +++ b/common/ast/src/ast/statements/share.rs @@ -17,6 +17,7 @@ use std::fmt::Formatter; use common_meta_app::share::ShareGrantObjectName; use common_meta_app::share::ShareGrantObjectPrivilege; +use itertools::Itertools; use crate::ast::Identifier; @@ -98,34 +99,30 @@ impl Display for RevokeShareObjectStmt<'_> { } #[derive(Debug, Clone, PartialEq, Eq)] -pub struct AlterShareAccountsStmt<'a> { +pub struct AlterShareTenantsStmt<'a> { pub share: Identifier<'a>, pub if_exists: bool, pub tenants: Vec>, - pub add: bool, + pub is_add: bool, } -impl Display for AlterShareAccountsStmt<'_> { +impl Display for AlterShareTenantsStmt<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "ALTER SHARE ")?; if self.if_exists { write!(f, "IF EXISTS ")?; } write!(f, "{}", self.share)?; - if self.add { + if self.is_add { write!(f, " ADD TENANTS = ")?; } else { write!(f, " REMOVE TENANTS = ")?; } - let mut first = true; - for account in self.tenants.iter() { - if !first { - write!(f, ",")?; - } else { - first = false; - } - write!(f, "{}", account)?; - } + write!( + f, + "{}", + self.tenants.iter().map(|v| v.to_string()).join(",") + )?; Ok(()) } diff --git a/common/ast/src/ast/statements/statement.rs b/common/ast/src/ast/statements/statement.rs index eef749eea4879..dce573b9ad341 100644 --- a/common/ast/src/ast/statements/statement.rs +++ b/common/ast/src/ast/statements/statement.rs @@ -163,7 +163,7 @@ pub enum Statement<'a> { DropShare(DropShareStmt<'a>), GrantShareObject(GrantShareObjectStmt<'a>), RevokeShareObject(RevokeShareObjectStmt<'a>), - AlterShareAccounts(AlterShareAccountsStmt<'a>), + AlterShareAccounts(AlterShareTenantsStmt<'a>), } #[derive(Debug, Clone, PartialEq)] diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index 8c91ea8a50359..c0649d2c2567d 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -798,11 +798,11 @@ pub fn statement(i: Input) -> IResult { rule! { ALTER ~ SHARE ~ (IF ~ EXISTS )? ~ #ident ~ #alter_add_share_accounts ~ TENANTS ~ Eq ~ #comma_separated_list1(ident) }, - |(_, _, opt_if_exists, share, add, _, _, tenants)| { - Statement::AlterShareAccounts(AlterShareAccountsStmt { + |(_, _, opt_if_exists, share, is_add, _, _, tenants)| { + Statement::AlterShareAccounts(AlterShareTenantsStmt { share, if_exists: opt_if_exists.is_some(), - add, + is_add, tenants, }) }, @@ -899,7 +899,7 @@ pub fn statement(i: Input) -> IResult { | #drop_share: "`DROP SHARE [IF EXISTS] `" | #grant_share_object: "`GRANT { USAGE | SELECT | REFERENCE_USAGE } ON { DATABASE db | TABLE db.table } TO SHARE `" | #revoke_share_object: "`REVOKE { USAGE | SELECT | REFERENCE_USAGE } ON { DATABASE db | TABLE db.table } FROM SHARE `" - | #alter_share_accounts: "`ALTER SHARE [IF EXISTS] {ADD | REMOVE} TENANTS = tenant [, tenant, ...]`" + | #alter_share_accounts: "`ALTER SHARE [IF EXISTS] { ADD | REMOVE } TENANTS = tenant [, tenant, ...]`" ), )); diff --git a/common/ast/tests/it/testdata/statement.txt b/common/ast/tests/it/testdata/statement.txt index cbeef09a50eda..5a526d9bded48 100644 --- a/common/ast/tests/it/testdata/statement.txt +++ b/common/ast/tests/it/testdata/statement.txt @@ -6096,7 +6096,7 @@ ALTER SHARE a ADD TENANTS = b,c; ALTER SHARE a ADD TENANTS = b,c ---------- AST ------------ AlterShareAccounts( - AlterShareAccountsStmt { + AlterShareTenantsStmt { share: Identifier { name: "a", quote: None, @@ -6115,7 +6115,7 @@ AlterShareAccounts( span: Ident(30..31), }, ], - add: true, + is_add: true, }, ) @@ -6126,7 +6126,7 @@ ALTER SHARE IF EXISTS a ADD TENANTS = b,c; ALTER SHARE IF EXISTS a ADD TENANTS = b,c ---------- AST ------------ AlterShareAccounts( - AlterShareAccountsStmt { + AlterShareTenantsStmt { share: Identifier { name: "a", quote: None, @@ -6145,7 +6145,7 @@ AlterShareAccounts( span: Ident(40..41), }, ], - add: true, + is_add: true, }, ) @@ -6156,7 +6156,7 @@ ALTER SHARE IF EXISTS a REMOVE TENANTS = b,c; ALTER SHARE IF EXISTS a REMOVE TENANTS = b,c ---------- AST ------------ AlterShareAccounts( - AlterShareAccountsStmt { + AlterShareTenantsStmt { share: Identifier { name: "a", quote: None, @@ -6175,7 +6175,7 @@ AlterShareAccounts( span: Ident(43..44), }, ], - add: false, + is_add: false, }, ) diff --git a/query/src/interpreters/interpreter_share_alter_tenants.rs b/query/src/interpreters/interpreter_share_alter_tenants.rs index f73fa77f0c6c7..3c05e26c46f3e 100644 --- a/query/src/interpreters/interpreter_share_alter_tenants.rs +++ b/query/src/interpreters/interpreter_share_alter_tenants.rs @@ -49,7 +49,7 @@ impl Interpreter for AlterShareTenantsInterpreter { let tenant = self.ctx.get_tenant(); let user_mgr = self.ctx.get_user_manager(); let meta_api = user_mgr.get_meta_store_client(); - if self.plan.add { + if self.plan.is_add { let req = AddShareAccountsReq { share_name: ShareNameIdent { tenant, diff --git a/query/src/sql/planner/binder/ddl/share.rs b/query/src/sql/planner/binder/ddl/share.rs index 7799b483b42fc..9a7c75a0a2d7a 100644 --- a/query/src/sql/planner/binder/ddl/share.rs +++ b/query/src/sql/planner/binder/ddl/share.rs @@ -14,6 +14,7 @@ use common_ast::ast::*; use common_exception::Result; +use itertools::Itertools; use crate::sessions::TableContext; use crate::sql::binder::Binder; @@ -105,26 +106,26 @@ impl<'a> Binder { pub(in crate::sql::planner::binder) async fn bind_alter_share_accounts( &mut self, - stmt: &AlterShareAccountsStmt<'a>, + stmt: &AlterShareTenantsStmt<'a>, ) -> Result { - let AlterShareAccountsStmt { + let AlterShareTenantsStmt { share, if_exists, tenants, - add, + is_add, } = stmt; let share = normalize_identifier(share, &self.name_resolution_ctx).name; - let mut accounts = vec![]; - for tenant in tenants { - accounts.push(tenant.to_string()); - } let plan = AlterShareAccountsPlan { share, if_exists: *if_exists, - add: *add, - accounts, + is_add: *is_add, + accounts: tenants + .iter() + .map(|v| v.to_string()) + .into_iter() + .collect_vec(), }; Ok(Plan::AlterShareAccounts(Box::new(plan))) } diff --git a/query/src/sql/planner/plans/share.rs b/query/src/sql/planner/plans/share.rs index a18980d50016b..5a836baed7715 100644 --- a/query/src/sql/planner/plans/share.rs +++ b/query/src/sql/planner/plans/share.rs @@ -112,7 +112,7 @@ pub struct AlterShareAccountsPlan { pub share: String, pub if_exists: bool, pub accounts: Vec, - pub add: bool, + pub is_add: bool, } impl AlterShareAccountsPlan { From c4497c7c77e9d548eaae9c4090e632e19299ead2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=82=8E=E6=B3=BC?= Date: Tue, 9 Aug 2022 18:08:45 +0800 Subject: [PATCH 13/59] refactor(proto-conv): trait FromToPB use associated type instead of type parameter Change trait signature `FromToProto` to ```rust trait FromToProto { type PB; } ``` When impl `FromToProto` for a rust type, there should be only one protobuf type that is convert from and to it. I.e, the protobuf type is determined by rust type. Thus `PB` should be a associated type. Otherwise, there could be two protobuf types bounded to one rust type(e.g., `impl FromToProto for T` and `impl FromToProto for T`), which is potentially invalid: when converting rust type to protobuf type, it does not know which it should be converted to. --- common/management/src/serde/pb_serde.rs | 14 ++--- common/meta/api/src/kv_api_utils.rs | 22 ++++---- common/meta/api/src/schema_api_test_suite.rs | 6 +-- .../src/config_from_to_protobuf_impl.rs | 8 ++- .../src/data_from_to_protobuf_impl.rs | 40 +++++++++----- .../src/database_from_to_protobuf_impl.rs | 9 ++-- common/proto-conv/src/from_to_protobuf.rs | 21 +++++--- .../src/share_from_to_protobuf_impl.rs | 15 ++++-- .../src/table_from_to_protobuf_impl.rs | 21 +++++--- .../src/user_from_to_protobuf_impl.rs | 54 ++++++++++++------- 10 files changed, 136 insertions(+), 74 deletions(-) diff --git a/common/management/src/serde/pb_serde.rs b/common/management/src/serde/pb_serde.rs index 16922cb0b9a95..5feea5629ccd7 100644 --- a/common/management/src/serde/pb_serde.rs +++ b/common/management/src/serde/pb_serde.rs @@ -19,12 +19,14 @@ use common_exception::Result; use common_exception::ToErrorCode; use common_proto_conv::FromToProto; -pub fn serialize_struct( - value: &impl FromToProto, +pub fn serialize_struct( + value: &T, err_code_fn: ErrFn, context_fn: CtxFn, ) -> Result> where + T: FromToProto + 'static, + T::PB: common_protos::prost::Message + Default, ErrFn: FnOnce(String) -> ErrorCode + std::marker::Copy, D: Display, CtxFn: FnOnce() -> D + std::marker::Copy, @@ -35,19 +37,19 @@ where Ok(buf) } -pub fn deserialize_struct( +pub fn deserialize_struct( buf: &[u8], err_code_fn: ErrFn, context_fn: CtxFn, ) -> Result where - PB: common_protos::prost::Message + Default, - T: FromToProto, + T: FromToProto, + T::PB: common_protos::prost::Message + Default, ErrFn: FnOnce(String) -> ErrorCode + std::marker::Copy, D: Display, CtxFn: FnOnce() -> D + std::marker::Copy, { - let p: PB = + let p: T::PB = common_protos::prost::Message::decode(buf).map_err_to_code(err_code_fn, context_fn)?; let v: T = FromToProto::from_pb(p).map_err_to_code(err_code_fn, context_fn)?; diff --git a/common/meta/api/src/kv_api_utils.rs b/common/meta/api/src/kv_api_utils.rs index cbd83f4cd14f5..ea1b8a4b85660 100644 --- a/common/meta/api/src/kv_api_utils.rs +++ b/common/meta/api/src/kv_api_utils.rs @@ -65,14 +65,14 @@ pub async fn get_u64_value( /// Get a struct value. /// /// It returns seq number and the data. -pub async fn get_struct_value( +pub async fn get_struct_value( kv_api: &(impl KVApi + ?Sized), k: &K, ) -> Result<(u64, Option), MetaError> where K: KVApiKey, - PB: common_protos::prost::Message + Default, - T: FromToProto, + T: FromToProto, + T::PB: common_protos::prost::Message + Default, { let res = kv_api.get_kv(&k.to_key()).await?; @@ -112,21 +112,23 @@ pub fn serialize_u64(value: u64) -> Result, MetaError> { Ok(v) } -pub fn serialize_struct( - value: &impl FromToProto, -) -> Result, MetaError> { +pub fn serialize_struct(value: &T) -> Result, MetaError> +where + T: FromToProto + 'static, + T::PB: common_protos::prost::Message, +{ let p = value.to_pb().map_err(meta_encode_err)?; let mut buf = vec![]; common_protos::prost::Message::encode(&p, &mut buf).map_err(meta_encode_err)?; Ok(buf) } -pub fn deserialize_struct(buf: &[u8]) -> Result +pub fn deserialize_struct(buf: &[u8]) -> Result where - PB: common_protos::prost::Message + Default, - T: FromToProto, + T: FromToProto, + T::PB: common_protos::prost::Message + Default, { - let p: PB = common_protos::prost::Message::decode(buf).map_err(meta_encode_err)?; + let p: T::PB = common_protos::prost::Message::decode(buf).map_err(meta_encode_err)?; let v: T = FromToProto::from_pb(p).map_err(meta_encode_err)?; Ok(v) diff --git a/common/meta/api/src/schema_api_test_suite.rs b/common/meta/api/src/schema_api_test_suite.rs index 559f6a15b591d..feaff9fdd6b63 100644 --- a/common/meta/api/src/schema_api_test_suite.rs +++ b/common/meta/api/src/schema_api_test_suite.rs @@ -193,13 +193,13 @@ async fn delete_test_data( Ok(()) } -async fn get_test_data( +async fn get_test_data( kv_api: &(impl KVApi + ?Sized), key: &impl KVApiKey, ) -> Result where - PB: common_protos::prost::Message + Default, - T: FromToProto, + T: FromToProto, + T::PB: common_protos::prost::Message + Default, { let res = kv_api.get_kv(&key.to_key()).await?; if let Some(res) = res { diff --git a/common/proto-conv/src/config_from_to_protobuf_impl.rs b/common/proto-conv/src/config_from_to_protobuf_impl.rs index ec82fa7bb46af..7b10f5c3ceaf3 100644 --- a/common/proto-conv/src/config_from_to_protobuf_impl.rs +++ b/common/proto-conv/src/config_from_to_protobuf_impl.rs @@ -22,7 +22,9 @@ use crate::Incompatible; use crate::MIN_COMPATIBLE_VER; use crate::VER; -impl FromToProto for StorageS3Config { +impl FromToProto for StorageS3Config { + type PB = pb::S3StorageConfig; + fn from_pb(p: pb::S3StorageConfig) -> Result where Self: Sized { check_ver(p.version, p.min_compatible)?; @@ -57,7 +59,9 @@ impl FromToProto for StorageS3Config { } } -impl FromToProto for StorageFsConfig { +impl FromToProto for StorageFsConfig { + type PB = pb::FsStorageConfig; + fn from_pb(p: pb::FsStorageConfig) -> Result where Self: Sized { check_ver(p.version, p.min_compatible)?; diff --git a/common/proto-conv/src/data_from_to_protobuf_impl.rs b/common/proto-conv/src/data_from_to_protobuf_impl.rs index 76f280bd935ec..4b1a045291ca7 100644 --- a/common/proto-conv/src/data_from_to_protobuf_impl.rs +++ b/common/proto-conv/src/data_from_to_protobuf_impl.rs @@ -30,7 +30,8 @@ use crate::Incompatible; use crate::MIN_COMPATIBLE_VER; use crate::VER; -impl FromToProto for dv::DataSchema { +impl FromToProto for dv::DataSchema { + type PB = pb::DataSchema; fn from_pb(p: pb::DataSchema) -> Result { check_ver(p.ver, p.min_compatible)?; @@ -59,7 +60,8 @@ impl FromToProto for dv::DataSchema { } } -impl FromToProto for dv::DataField { +impl FromToProto for dv::DataField { + type PB = pb::DataField; fn from_pb(p: pb::DataField) -> Result { check_ver(p.ver, p.min_compatible)?; @@ -85,7 +87,8 @@ impl FromToProto for dv::DataField { } } -impl FromToProto for dv::DataTypeImpl { +impl FromToProto for dv::DataTypeImpl { + type PB = pb::DataType; fn from_pb(p: pb::DataType) -> Result { check_ver(p.ver, p.min_compatible)?; @@ -322,7 +325,8 @@ impl FromToProto for dv::DataTypeImpl { } } -impl FromToProto for dv::NullableType { +impl FromToProto for dv::NullableType { + type PB = pb::NullableType; fn from_pb(p: pb::NullableType) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -350,7 +354,8 @@ impl FromToProto for dv::NullableType { } } -impl FromToProto for dv::TimestampType { +impl FromToProto for dv::TimestampType { + type PB = pb::Timestamp; fn from_pb(p: pb::Timestamp) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -370,7 +375,8 @@ impl FromToProto for dv::TimestampType { } } -impl FromToProto for dv::StructType { +impl FromToProto for dv::StructType { + type PB = pb::Struct; fn from_pb(p: pb::Struct) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -414,7 +420,8 @@ impl FromToProto for dv::StructType { } } -impl FromToProto for dv::ArrayType { +impl FromToProto for dv::ArrayType { + type PB = pb::Array; fn from_pb(p: pb::Array) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -442,7 +449,8 @@ impl FromToProto for dv::ArrayType { } } -impl FromToProto for dv::VariantArrayType { +impl FromToProto for dv::VariantArrayType { + type PB = pb::VariantArray; fn from_pb(p: pb::VariantArray) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -459,7 +467,8 @@ impl FromToProto for dv::VariantArrayType { } } -impl FromToProto for dv::VariantObjectType { +impl FromToProto for dv::VariantObjectType { + type PB = pb::VariantObject; fn from_pb(p: pb::VariantObject) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -476,7 +485,8 @@ impl FromToProto for dv::VariantObjectType { } } -impl FromToProto for dv::IntervalKind { +impl FromToProto for dv::IntervalKind { + type PB = pb::IntervalKind; fn from_pb(p: pb::IntervalKind) -> Result where Self: Sized { let dv_kind = match p { @@ -507,7 +517,8 @@ impl FromToProto for dv::IntervalKind { Ok(pb_kind) } } -impl FromToProto for dv::IntervalType { +impl FromToProto for dv::IntervalType { + type PB = pb::IntervalType; fn from_pb(p: pb::IntervalType) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -532,7 +543,8 @@ impl FromToProto for dv::IntervalType { } } -impl FromToProto for dv::VariantType { +impl FromToProto for dv::VariantType { + type PB = pb::Variant; fn from_pb(p: pb::Variant) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -549,7 +561,9 @@ impl FromToProto for dv::VariantType { } } -impl FromToProto for DateTime { +impl FromToProto for DateTime { + type PB = String; + fn from_pb(p: String) -> Result { let v = DateTime::::from_str(&p).map_err(|e| Incompatible { reason: format!("DateTime error: {}", e), diff --git a/common/proto-conv/src/database_from_to_protobuf_impl.rs b/common/proto-conv/src/database_from_to_protobuf_impl.rs index d0805806f883f..1d4217e922525 100644 --- a/common/proto-conv/src/database_from_to_protobuf_impl.rs +++ b/common/proto-conv/src/database_from_to_protobuf_impl.rs @@ -28,7 +28,8 @@ use crate::Incompatible; use crate::MIN_COMPATIBLE_VER; use crate::VER; -impl FromToProto for mt::DatabaseNameIdent { +impl FromToProto for mt::DatabaseNameIdent { + type PB = pb::DatabaseNameIdent; fn from_pb(p: pb::DatabaseNameIdent) -> Result { check_ver(p.ver, p.min_compatible)?; @@ -50,7 +51,8 @@ impl FromToProto for mt::DatabaseNameIdent { } } -impl FromToProto for mt::DatabaseMeta { +impl FromToProto for mt::DatabaseMeta { + type PB = pb::DatabaseMeta; fn from_pb(p: pb::DatabaseMeta) -> Result { check_ver(p.ver, p.min_compatible)?; @@ -90,7 +92,8 @@ impl FromToProto for mt::DatabaseMeta { } } -impl FromToProto for mt::DbIdList { +impl FromToProto for mt::DbIdList { + type PB = pb::DbIdList; fn from_pb(p: pb::DbIdList) -> Result { check_ver(p.ver, p.min_compatible)?; diff --git a/common/proto-conv/src/from_to_protobuf.rs b/common/proto-conv/src/from_to_protobuf.rs index d5d5d8b420e55..d02343342835d 100644 --- a/common/proto-conv/src/from_to_protobuf.rs +++ b/common/proto-conv/src/from_to_protobuf.rs @@ -15,11 +15,16 @@ use std::sync::Arc; /// Defines API to convert from/to protobuf meta type. -pub trait FromToProto { - fn from_pb(p: PB) -> Result +pub trait FromToProto { + /// The corresponding protobuf defined type. + type PB; + + /// Convert to rust type from protobuf type. + fn from_pb(p: Self::PB) -> Result where Self: Sized; - fn to_pb(&self) -> Result; + /// Convert from rust type to protobuf type. + fn to_pb(&self) -> Result; } #[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)] @@ -28,15 +33,17 @@ pub struct Incompatible { pub reason: String, } -impl FromToProto for Arc -where T: FromToProto +impl FromToProto for Arc +where T: FromToProto { - fn from_pb(p: PB) -> Result + type PB = T::PB; + + fn from_pb(p: Self::PB) -> Result where Self: Sized { Ok(Arc::new(T::from_pb(p)?)) } - fn to_pb(&self) -> Result { + fn to_pb(&self) -> Result { let s = self.as_ref(); s.to_pb() } diff --git a/common/proto-conv/src/share_from_to_protobuf_impl.rs b/common/proto-conv/src/share_from_to_protobuf_impl.rs index d782afe0e35e4..5fbd78f7e9cf2 100644 --- a/common/proto-conv/src/share_from_to_protobuf_impl.rs +++ b/common/proto-conv/src/share_from_to_protobuf_impl.rs @@ -30,7 +30,8 @@ use crate::Incompatible; use crate::MIN_COMPATIBLE_VER; use crate::VER; -impl FromToProto for mt::ShareNameIdent { +impl FromToProto for mt::ShareNameIdent { + type PB = pb::ShareNameIdent; fn from_pb(p: pb::ShareNameIdent) -> Result { check_ver(p.ver, p.min_compatible)?; @@ -52,7 +53,8 @@ impl FromToProto for mt::ShareNameIdent { } } -impl FromToProto for mt::ShareGrantObject { +impl FromToProto for mt::ShareGrantObject { + type PB = pb::ShareGrantObject; fn from_pb(p: pb::ShareGrantObject) -> Result { check_ver(p.ver, p.min_compatible)?; @@ -88,7 +90,8 @@ impl FromToProto for mt::ShareGrantObject { } } -impl FromToProto for mt::ShareGrantEntry { +impl FromToProto for mt::ShareGrantEntry { + type PB = pb::ShareGrantEntry; fn from_pb(p: pb::ShareGrantEntry) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -127,7 +130,8 @@ impl FromToProto for mt::ShareGrantEntry { } } -impl FromToProto for mt::ShareMeta { +impl FromToProto for mt::ShareMeta { + type PB = pb::ShareMeta; fn from_pb(p: pb::ShareMeta) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -177,7 +181,8 @@ impl FromToProto for mt::ShareMeta { } } -impl FromToProto for mt::ShareAccountMeta { +impl FromToProto for mt::ShareAccountMeta { + type PB = pb::ShareAccountMeta; fn from_pb(p: pb::ShareAccountMeta) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; diff --git a/common/proto-conv/src/table_from_to_protobuf_impl.rs b/common/proto-conv/src/table_from_to_protobuf_impl.rs index f86fa3c10f930..261c79295f72f 100644 --- a/common/proto-conv/src/table_from_to_protobuf_impl.rs +++ b/common/proto-conv/src/table_from_to_protobuf_impl.rs @@ -29,7 +29,8 @@ use crate::Incompatible; use crate::MIN_COMPATIBLE_VER; use crate::VER; -impl FromToProto for mt::TableInfo { +impl FromToProto for mt::TableInfo { + type PB = pb::TableInfo; fn from_pb(p: pb::TableInfo) -> Result { check_ver(p.ver, p.min_compatible)?; @@ -65,7 +66,8 @@ impl FromToProto for mt::TableInfo { } } -impl FromToProto for mt::TableNameIdent { +impl FromToProto for mt::TableNameIdent { + type PB = pb::TableNameIdent; fn from_pb(p: pb::TableNameIdent) -> Result { check_ver(p.ver, p.min_compatible)?; @@ -89,7 +91,8 @@ impl FromToProto for mt::TableNameIdent { } } -impl FromToProto for mt::DBIdTableName { +impl FromToProto for mt::DBIdTableName { + type PB = pb::DbIdTableName; fn from_pb(p: pb::DbIdTableName) -> Result { check_ver(p.ver, p.min_compatible)?; @@ -111,7 +114,8 @@ impl FromToProto for mt::DBIdTableName { } } -impl FromToProto for mt::TableIdent { +impl FromToProto for mt::TableIdent { + type PB = pb::TableIdent; fn from_pb(p: pb::TableIdent) -> Result { check_ver(p.ver, p.min_compatible)?; @@ -134,7 +138,8 @@ impl FromToProto for mt::TableIdent { } } -impl FromToProto for mt::TableMeta { +impl FromToProto for mt::TableMeta { + type PB = pb::TableMeta; fn from_pb(p: pb::TableMeta) -> Result { check_ver(p.ver, p.min_compatible)?; @@ -197,7 +202,8 @@ impl FromToProto for mt::TableMeta { } } -impl FromToProto for mt::TableStatistics { +impl FromToProto for mt::TableStatistics { + type PB = pb::TableStatistics; fn from_pb(p: pb::TableStatistics) -> Result { check_ver(p.ver, p.min_compatible)?; @@ -224,7 +230,8 @@ impl FromToProto for mt::TableStatistics { } } -impl FromToProto for mt::TableIdList { +impl FromToProto for mt::TableIdList { + type PB = pb::TableIdList; fn from_pb(p: pb::TableIdList) -> Result { check_ver(p.ver, p.min_compatible)?; diff --git a/common/proto-conv/src/user_from_to_protobuf_impl.rs b/common/proto-conv/src/user_from_to_protobuf_impl.rs index 395372ba97278..47bb8c53bfa62 100644 --- a/common/proto-conv/src/user_from_to_protobuf_impl.rs +++ b/common/proto-conv/src/user_from_to_protobuf_impl.rs @@ -35,7 +35,8 @@ use crate::Incompatible; use crate::MIN_COMPATIBLE_VER; use crate::VER; -impl FromToProto for mt::AuthInfo { +impl FromToProto for mt::AuthInfo { + type PB = pb::AuthInfo; fn from_pb(p: pb::AuthInfo) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -78,7 +79,8 @@ impl FromToProto for mt::AuthInfo { } } -impl FromToProto for mt::UserOption { +impl FromToProto for mt::UserOption { + type PB = pb::UserOption; fn from_pb(p: pb::UserOption) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -101,7 +103,8 @@ impl FromToProto for mt::UserOption { } } -impl FromToProto for mt::UserQuota { +impl FromToProto for mt::UserQuota { + type PB = pb::UserQuota; fn from_pb(p: pb::UserQuota) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -124,7 +127,8 @@ impl FromToProto for mt::UserQuota { } } -impl FromToProto for mt::GrantObject { +impl FromToProto for mt::GrantObject { + type PB = pb::GrantObject; fn from_pb(p: pb::GrantObject) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -175,7 +179,8 @@ impl FromToProto for mt::GrantObject { } } -impl FromToProto for mt::GrantEntry { +impl FromToProto for mt::GrantEntry { + type PB = pb::GrantEntry; fn from_pb(p: pb::GrantEntry) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -204,7 +209,8 @@ impl FromToProto for mt::GrantEntry { } } -impl FromToProto for mt::UserGrantSet { +impl FromToProto for mt::UserGrantSet { + type PB = pb::UserGrantSet; fn from_pb(p: pb::UserGrantSet) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -240,7 +246,8 @@ impl FromToProto for mt::UserGrantSet { } } -impl FromToProto for mt::UserInfo { +impl FromToProto for mt::UserInfo { + type PB = pb::UserInfo; fn from_pb(p: pb::UserInfo) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -277,7 +284,8 @@ impl FromToProto for mt::UserInfo { } } -impl FromToProto for mt::UserIdentity { +impl FromToProto for mt::UserIdentity { + type PB = pb::UserIdentity; fn from_pb(p: pb::UserIdentity) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -298,7 +306,8 @@ impl FromToProto for mt::UserIdentity { } } -impl FromToProto for mt::StageFileFormatType { +impl FromToProto for mt::StageFileFormatType { + type PB = pb::user_stage_info::StageFileFormatType; fn from_pb(p: pb::user_stage_info::StageFileFormatType) -> Result where Self: Sized { match p { @@ -331,7 +340,8 @@ impl FromToProto for mt::StageFileForm } } -impl FromToProto for mt::StageFileCompression { +impl FromToProto for mt::StageFileCompression { + type PB = pb::user_stage_info::StageFileCompression; fn from_pb(p: pb::user_stage_info::StageFileCompression) -> Result where Self: Sized { match p { @@ -382,7 +392,8 @@ impl FromToProto for mt::StageFileCom } } -impl FromToProto for mt::StageType { +impl FromToProto for mt::StageType { + type PB = pb::user_stage_info::StageType; fn from_pb(p: pb::user_stage_info::StageType) -> Result where Self: Sized { match p { @@ -399,7 +410,8 @@ impl FromToProto for mt::StageType { } } -impl FromToProto for StorageParams { +impl FromToProto for StorageParams { + type PB = pb::user_stage_info::StageStorage; fn from_pb(p: pb::user_stage_info::StageStorage) -> Result where Self: Sized { match p.storage { @@ -428,7 +440,8 @@ impl FromToProto for StorageParams { } } -impl FromToProto for mt::StageParams { +impl FromToProto for mt::StageParams { + type PB = pb::user_stage_info::StageParams; fn from_pb(p: pb::user_stage_info::StageParams) -> Result where Self: Sized { Ok(mt::StageParams { @@ -445,7 +458,8 @@ impl FromToProto for mt::StageParams { } } -impl FromToProto for mt::FileFormatOptions { +impl FromToProto for mt::FileFormatOptions { + type PB = pb::user_stage_info::FileFormatOptions; fn from_pb(p: pb::user_stage_info::FileFormatOptions) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -486,7 +500,8 @@ impl FromToProto for mt::FileFormatOptio } } -impl FromToProto for mt::OnErrorMode { +impl FromToProto for mt::OnErrorMode { + type PB = pb::user_stage_info::OnErrorMode; fn from_pb(p: pb::user_stage_info::OnErrorMode) -> Result where Self: Sized { match p.mode { @@ -536,7 +551,8 @@ impl FromToProto for mt::OnErrorMode { } } -impl FromToProto for mt::CopyOptions { +impl FromToProto for mt::CopyOptions { + type PB = pb::user_stage_info::CopyOptions; fn from_pb(p: pb::user_stage_info::CopyOptions) -> Result where Self: Sized { let on_error = mt::OnErrorMode::from_pb(p.on_error.ok_or_else(|| Incompatible { @@ -563,7 +579,8 @@ impl FromToProto for mt::CopyOptions { } } -impl FromToProto for mt::UserStageInfo { +impl FromToProto for mt::UserStageInfo { + type PB = pb::UserStageInfo; fn from_pb(p: pb::UserStageInfo) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; @@ -617,7 +634,8 @@ impl FromToProto for mt::UserStageInfo { } } -impl FromToProto for mt::StageFile { +impl FromToProto for mt::StageFile { + type PB = pb::StageFile; fn from_pb(p: pb::StageFile) -> Result where Self: Sized { check_ver(p.ver, p.min_compatible)?; From 08ae12692583ce4b828a50230eb8870ab8793b20 Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Tue, 9 Aug 2022 19:28:34 +0800 Subject: [PATCH 14/59] refactor: factor out find_all_related_roles --- common/users/src/lib.rs | 1 + common/users/src/role_cache_mgr.rs | 28 ++----------------- common/users/src/role_util.rs | 45 ++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 26 deletions(-) create mode 100644 common/users/src/role_util.rs diff --git a/common/users/src/lib.rs b/common/users/src/lib.rs index 113690293b10a..35e32cceb1a87 100644 --- a/common/users/src/lib.rs +++ b/common/users/src/lib.rs @@ -22,6 +22,7 @@ mod user_stage; mod user_udf; pub mod role_cache_mgr; +mod role_util; pub use jwt::*; pub use role_cache_mgr::RoleCacheMgr; diff --git a/common/users/src/role_cache_mgr.rs b/common/users/src/role_cache_mgr.rs index 599c211874fc3..02f4281798ec1 100644 --- a/common/users/src/role_cache_mgr.rs +++ b/common/users/src/role_cache_mgr.rs @@ -26,6 +26,7 @@ use common_meta_types::RoleInfo; use parking_lot::RwLock; use tracing::warn; +use crate::role_util::find_all_related_roles; use crate::UserApiProvider; struct CachedRoles { @@ -86,6 +87,7 @@ impl RoleCacheMgr { cached.remove(tenant); } + // find_related_roles is called on validating an user's privileges. pub async fn find_related_roles( &self, tenant: &str, @@ -134,29 +136,3 @@ async fn load_roles_data(user_api: &Arc, tenant: &str) -> Resul cached_at: Instant::now(), }) } - -// An role can be granted with multiple roles, find all the related roles in a DFS manner -pub fn find_all_related_roles( - cache: &HashMap, - role_identities: &[String], -) -> Vec { - let mut visited: HashSet = HashSet::new(); - let mut result: Vec = vec![]; - let mut q: VecDeque = role_identities.iter().cloned().collect(); - while let Some(role_identity) = q.pop_front() { - if visited.contains(&role_identity) { - continue; - } - let cache_key = role_identity.to_string(); - visited.insert(role_identity); - let role = match cache.get(&cache_key) { - None => continue, - Some(role) => role, - }; - result.push(role.clone()); - for related_role in role.grants.roles() { - q.push_back(related_role); - } - } - result -} diff --git a/common/users/src/role_util.rs b/common/users/src/role_util.rs new file mode 100644 index 0000000000000..01ef0a5b0fec3 --- /dev/null +++ b/common/users/src/role_util.rs @@ -0,0 +1,45 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::collections::HashSet; +use std::collections::VecDeque; + +use common_meta_types::RoleInfo; + +// An role can be granted with multiple roles, find all the related roles in a DFS manner +pub fn find_all_related_roles( + cache: &HashMap, + role_identities: &[String], +) -> Vec { + let mut visited: HashSet = HashSet::new(); + let mut result: Vec = vec![]; + let mut q: VecDeque = role_identities.iter().cloned().collect(); + while let Some(role_identity) = q.pop_front() { + if visited.contains(&role_identity) { + continue; + } + let cache_key = role_identity.to_string(); + visited.insert(role_identity); + let role = match cache.get(&cache_key) { + None => continue, + Some(role) => role, + }; + result.push(role.clone()); + for related_role in role.grants.roles() { + q.push_back(related_role); + } + } + result +} From 34adfcd17f4f3b476c8354331dfe6d153fd53555 Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Tue, 9 Aug 2022 19:30:07 +0800 Subject: [PATCH 15/59] refactor: move grant/revoke_role_to_user to role_mgr --- common/users/src/role_mgr.rs | 27 +++++++++++++++++++++++++++ common/users/src/user_mgr.rs | 26 -------------------------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/common/users/src/role_mgr.rs b/common/users/src/role_mgr.rs index ab335f26d9926..73f91ed6bc3a9 100644 --- a/common/users/src/role_mgr.rs +++ b/common/users/src/role_mgr.rs @@ -16,6 +16,7 @@ use common_exception::ErrorCode; use common_exception::Result; use common_meta_types::GrantObject; use common_meta_types::RoleInfo; +use common_meta_types::UserIdentity; use common_meta_types::UserPrivilegeSet; use crate::UserApiProvider; @@ -106,6 +107,32 @@ impl UserApiProvider { .map_err(|e| e.add_message_back("(while revoke role privileges)")) } + pub async fn grant_role_to_user( + &self, + tenant: &str, + user: UserIdentity, + grant_role: String, + ) -> Result> { + let client = self.get_user_api_client(tenant)?; + client + .grant_role(user, grant_role.clone(), None) + .await + .map_err(|e| e.add_message_back("(while grant role to user)")) + } + + pub async fn revoke_role_from_user( + &self, + tenant: &str, + user: UserIdentity, + revoke_role: String, + ) -> Result> { + let client = self.get_user_api_client(tenant)?; + client + .revoke_role(user, revoke_role.clone(), None) + .await + .map_err(|e| e.add_message_back("(while revoke role from user)")) + } + pub async fn grant_role_to_role( &self, tenant: &str, diff --git a/common/users/src/user_mgr.rs b/common/users/src/user_mgr.rs index c631962ceeadd..2bb890be9e228 100644 --- a/common/users/src/user_mgr.rs +++ b/common/users/src/user_mgr.rs @@ -143,32 +143,6 @@ impl UserApiProvider { .map_err(|e| e.add_message_back("(while revoke user privileges)")) } - pub async fn grant_role_to_user( - &self, - tenant: &str, - user: UserIdentity, - grant_role: String, - ) -> Result> { - let client = self.get_user_api_client(tenant)?; - client - .grant_role(user, grant_role.clone(), None) - .await - .map_err(|e| e.add_message_back("(while grant role to user)")) - } - - pub async fn revoke_role_from_user( - &self, - tenant: &str, - user: UserIdentity, - revoke_role: String, - ) -> Result> { - let client = self.get_user_api_client(tenant)?; - client - .revoke_role(user, revoke_role.clone(), None) - .await - .map_err(|e| e.add_message_back("(while revoke role from user)")) - } - // Drop a user by name and hostname. pub async fn drop_user(&self, tenant: &str, user: UserIdentity, if_exists: bool) -> Result<()> { let client = self.get_user_api_client(tenant)?; From 2ed73f3d5fbaad17f04827231837cdc921c6f8a4 Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Tue, 9 Aug 2022 20:07:26 +0800 Subject: [PATCH 16/59] feat: add cycle detection on role --- common/exception/src/exception_code.rs | 1 + common/users/src/role_mgr.rs | 33 ++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/common/exception/src/exception_code.rs b/common/exception/src/exception_code.rs index ee3a0df371f65..e9f840f636e93 100644 --- a/common/exception/src/exception_code.rs +++ b/common/exception/src/exception_code.rs @@ -180,6 +180,7 @@ build_exceptions! { IllegalUserInfoFormat(2203), UnknownRole(2204), IllegalUserSettingFormat(2205), + InvalidRole(2206), // Meta api error codes. DatabaseAlreadyExists(2301), diff --git a/common/users/src/role_mgr.rs b/common/users/src/role_mgr.rs index 73f91ed6bc3a9..e1e224d8b6c8b 100644 --- a/common/users/src/role_mgr.rs +++ b/common/users/src/role_mgr.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; + use common_exception::ErrorCode; use common_exception::Result; use common_meta_types::GrantObject; @@ -19,6 +21,7 @@ use common_meta_types::RoleInfo; use common_meta_types::UserIdentity; use common_meta_types::UserPrivilegeSet; +use crate::role_util::find_all_related_roles; use crate::UserApiProvider; impl UserApiProvider { @@ -133,15 +136,27 @@ impl UserApiProvider { .map_err(|e| e.add_message_back("(while revoke role from user)")) } + // the grant_role can not have cycle with target_role. pub async fn grant_role_to_role( &self, tenant: &str, - role: String, + target_role: String, grant_role: String, ) -> Result> { + let related_roles = self.find_related_roles(tenant, &vec![grant_role]).await?; + let have_cycle = related_roles + .into_iter() + .any(|r| r.identity() == target_role); + if have_cycle { + return Err(ErrorCode::InvalidRole(format!( + "there's cycle between {} and {}", + target_role, grant_role + ))); + } + let client = self.get_role_api_client(tenant)?; client - .grant_role(role, grant_role, None) + .grant_role(target_role, grant_role, None) .await .map_err(|e| e.add_message_back("(while grant role to role)")) } @@ -174,4 +189,18 @@ impl UserApiProvider { } } } + + fn find_related_roles( + &self, + tenant: &str, + role_identities: &[String], + ) -> Result> { + let tenant_roles_map = self + .get_roles(tenant) + .await? + .into_iter() + .map(|r| (r.identity(), r)) + .collect::>(); + Ok(find_all_related_roles(&tenant_roles_map, role_identities)) + } } From a011e901938f84dc947804576362a8ba5fa6ff54 Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Tue, 9 Aug 2022 20:08:48 +0800 Subject: [PATCH 17/59] refactor: revert 34adfcd17 --- common/users/src/role_mgr.rs | 27 --------------------------- common/users/src/user_mgr.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/common/users/src/role_mgr.rs b/common/users/src/role_mgr.rs index e1e224d8b6c8b..dfd5ff965cecc 100644 --- a/common/users/src/role_mgr.rs +++ b/common/users/src/role_mgr.rs @@ -18,7 +18,6 @@ use common_exception::ErrorCode; use common_exception::Result; use common_meta_types::GrantObject; use common_meta_types::RoleInfo; -use common_meta_types::UserIdentity; use common_meta_types::UserPrivilegeSet; use crate::role_util::find_all_related_roles; @@ -110,32 +109,6 @@ impl UserApiProvider { .map_err(|e| e.add_message_back("(while revoke role privileges)")) } - pub async fn grant_role_to_user( - &self, - tenant: &str, - user: UserIdentity, - grant_role: String, - ) -> Result> { - let client = self.get_user_api_client(tenant)?; - client - .grant_role(user, grant_role.clone(), None) - .await - .map_err(|e| e.add_message_back("(while grant role to user)")) - } - - pub async fn revoke_role_from_user( - &self, - tenant: &str, - user: UserIdentity, - revoke_role: String, - ) -> Result> { - let client = self.get_user_api_client(tenant)?; - client - .revoke_role(user, revoke_role.clone(), None) - .await - .map_err(|e| e.add_message_back("(while revoke role from user)")) - } - // the grant_role can not have cycle with target_role. pub async fn grant_role_to_role( &self, diff --git a/common/users/src/user_mgr.rs b/common/users/src/user_mgr.rs index 2bb890be9e228..c631962ceeadd 100644 --- a/common/users/src/user_mgr.rs +++ b/common/users/src/user_mgr.rs @@ -143,6 +143,32 @@ impl UserApiProvider { .map_err(|e| e.add_message_back("(while revoke user privileges)")) } + pub async fn grant_role_to_user( + &self, + tenant: &str, + user: UserIdentity, + grant_role: String, + ) -> Result> { + let client = self.get_user_api_client(tenant)?; + client + .grant_role(user, grant_role.clone(), None) + .await + .map_err(|e| e.add_message_back("(while grant role to user)")) + } + + pub async fn revoke_role_from_user( + &self, + tenant: &str, + user: UserIdentity, + revoke_role: String, + ) -> Result> { + let client = self.get_user_api_client(tenant)?; + client + .revoke_role(user, revoke_role.clone(), None) + .await + .map_err(|e| e.add_message_back("(while revoke role from user)")) + } + // Drop a user by name and hostname. pub async fn drop_user(&self, tenant: &str, user: UserIdentity, if_exists: bool) -> Result<()> { let client = self.get_user_api_client(tenant)?; From a24d03a8fb19ca5cdb22f5b521b47575987d217e Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Tue, 9 Aug 2022 20:21:42 +0800 Subject: [PATCH 18/59] fix: cargo check --- common/users/src/lib.rs | 2 +- common/users/src/role_cache_mgr.rs | 2 -- common/users/src/role_mgr.rs | 8 +++++--- common/users/tests/it/role_cache_mgr.rs | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/common/users/src/lib.rs b/common/users/src/lib.rs index 35e32cceb1a87..4d04d1a5a416c 100644 --- a/common/users/src/lib.rs +++ b/common/users/src/lib.rs @@ -22,7 +22,7 @@ mod user_stage; mod user_udf; pub mod role_cache_mgr; -mod role_util; +pub mod role_util; pub use jwt::*; pub use role_cache_mgr::RoleCacheMgr; diff --git a/common/users/src/role_cache_mgr.rs b/common/users/src/role_cache_mgr.rs index 02f4281798ec1..5a6c1bc93b8cc 100644 --- a/common/users/src/role_cache_mgr.rs +++ b/common/users/src/role_cache_mgr.rs @@ -13,8 +13,6 @@ // limitations under the License. use std::collections::HashMap; -use std::collections::HashSet; -use std::collections::VecDeque; use std::sync::Arc; use std::time::Duration; use std::time::Instant; diff --git a/common/users/src/role_mgr.rs b/common/users/src/role_mgr.rs index dfd5ff965cecc..76e9e32513f65 100644 --- a/common/users/src/role_mgr.rs +++ b/common/users/src/role_mgr.rs @@ -116,14 +116,16 @@ impl UserApiProvider { target_role: String, grant_role: String, ) -> Result> { - let related_roles = self.find_related_roles(tenant, &vec![grant_role]).await?; + let related_roles = self + .find_related_roles(tenant, &[grant_role.clone()]) + .await?; let have_cycle = related_roles .into_iter() .any(|r| r.identity() == target_role); if have_cycle { return Err(ErrorCode::InvalidRole(format!( "there's cycle between {} and {}", - target_role, grant_role + &target_role, &grant_role ))); } @@ -163,7 +165,7 @@ impl UserApiProvider { } } - fn find_related_roles( + async fn find_related_roles( &self, tenant: &str, role_identities: &[String], diff --git a/common/users/tests/it/role_cache_mgr.rs b/common/users/tests/it/role_cache_mgr.rs index 403140bfa5573..fbe443d99a982 100644 --- a/common/users/tests/it/role_cache_mgr.rs +++ b/common/users/tests/it/role_cache_mgr.rs @@ -21,7 +21,7 @@ use common_grpc::RpcClientConf; use common_meta_types::GrantObject; use common_meta_types::RoleInfo; use common_meta_types::UserPrivilegeSet; -use common_users::role_cache_mgr::find_all_related_roles; +use common_users::role_util::find_all_related_roles; use common_users::RoleCacheMgr; use common_users::UserApiProvider; From 790743c02b692c012e37c4c2012074e84d5cfa7b Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Tue, 9 Aug 2022 20:46:13 +0800 Subject: [PATCH 19/59] add sqlogictest --- tests/logictest/main.py | 0 .../base/05_ddl/05_0017_ddl_grant_role_v2 | 27 +++++++++++++++++++ 2 files changed, 27 insertions(+) mode change 100644 => 100755 tests/logictest/main.py diff --git a/tests/logictest/main.py b/tests/logictest/main.py old mode 100644 new mode 100755 diff --git a/tests/logictest/suites/base/05_ddl/05_0017_ddl_grant_role_v2 b/tests/logictest/suites/base/05_ddl/05_0017_ddl_grant_role_v2 index a88abc23d0c0c..f5808331007d9 100644 --- a/tests/logictest/suites/base/05_ddl/05_0017_ddl_grant_role_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0017_ddl_grant_role_v2 @@ -34,6 +34,24 @@ CREATE ROLE 'test-role'; statement ok GRANT ROLE 'test' TO ROLE 'test-role'; +statement ok +CREATE ROLE IF NOT EXISTS 'role1'; + +statement ok +CREATE ROLE IF NOT EXISTS 'role2'; + +statement ok +CREATE ROLE IF NOT EXISTS 'role3'; + +statement ok +GRANT ROLE 'role1' TO ROLE 'role2'; + +statement ok +GRANT ROLE 'role2' TO ROLE 'role3'; + +statement error 2206 +GRANT ROLE 'role3' TO ROLE 'role1'; + statement ok DROP ROLE 'test'; @@ -43,6 +61,15 @@ DROP ROLE 'test-role'; statement ok DROP USER 'test-user'; +statement ok +DROP USER 'role1'; + +statement ok +DROP USER 'role2'; + +statement ok +DROP USER 'role3'; + statement ok SET enable_planner_v2 = 0; From d14d9a77ef0865068e316c3ce45e44702655b174 Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Tue, 9 Aug 2022 20:53:54 +0800 Subject: [PATCH 20/59] test: change sqlogictest error hint --- tests/logictest/logictest.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/logictest/logictest.py b/tests/logictest/logictest.py index 1a33c10e46e50..5628b034da90f 100644 --- a/tests/logictest/logictest.py +++ b/tests/logictest/logictest.py @@ -413,16 +413,15 @@ def assert_execute_error(self, statement): actual = safe_execute(lambda: self.execute_error(statement.text), statement) if actual is None: - raise LogicError(message=f"{str(statement)}", - errorType="statement error get no error message", + raise LogicError(message=f"expected error {statement.s_type.expect_error}, but got ok on statement: {statement.text} " , + errorType="Error code mismatch", runner=self.kind) match = re.search(statement.s_type.expect_error, actual.msg) if match is None: raise LogicError( message= f"\n expected error regex is {statement.s_type.expect_error}\n actual found {actual}{str(statement)}", - errorType= - f"statement error get error message not equal to expected", + errorType="Error code mismatch", runner=self.kind) def run_sql_suite(self): From 965122fe71b4837363d76dc9483364182241a8ea Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Tue, 9 Aug 2022 21:17:12 +0800 Subject: [PATCH 21/59] test: fix sqlogictest about 05_0017 --- common/users/src/role_mgr.rs | 8 +++----- .../suites/base/05_ddl/05_0017_ddl_grant_role_v2 | 15 ++++++++++++--- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/common/users/src/role_mgr.rs b/common/users/src/role_mgr.rs index 76e9e32513f65..ba714bf80469d 100644 --- a/common/users/src/role_mgr.rs +++ b/common/users/src/role_mgr.rs @@ -119,13 +119,11 @@ impl UserApiProvider { let related_roles = self .find_related_roles(tenant, &[grant_role.clone()]) .await?; - let have_cycle = related_roles - .into_iter() - .any(|r| r.identity() == target_role); + let have_cycle = related_roles.iter().any(|r| r.identity() == target_role); if have_cycle { return Err(ErrorCode::InvalidRole(format!( - "there's cycle between {} and {}", - &target_role, &grant_role + "{} contains {}, can not be grant to {}", + &grant_role, &target_role, &target_role ))); } diff --git a/tests/logictest/suites/base/05_ddl/05_0017_ddl_grant_role_v2 b/tests/logictest/suites/base/05_ddl/05_0017_ddl_grant_role_v2 index f5808331007d9..a1074d4e3f566 100644 --- a/tests/logictest/suites/base/05_ddl/05_0017_ddl_grant_role_v2 +++ b/tests/logictest/suites/base/05_ddl/05_0017_ddl_grant_role_v2 @@ -10,6 +10,15 @@ DROP ROLE IF EXISTS 'test-role'; statement ok DROP USER IF EXISTS 'test-user'; +statement ok +DROP ROLE IF EXISTS 'role1'; + +statement ok +DROP ROLE IF EXISTS 'role2'; + +statement ok +DROP ROLE IF EXISTS 'role3'; + statement error 2204 GRANT ROLE 'test' TO 'test-user'; @@ -62,13 +71,13 @@ statement ok DROP USER 'test-user'; statement ok -DROP USER 'role1'; +DROP ROLE 'role1'; statement ok -DROP USER 'role2'; +DROP ROLE 'role2'; statement ok -DROP USER 'role3'; +DROP ROLE 'role3'; statement ok SET enable_planner_v2 = 0; From 79ec9279666dcc17e494dac11dc7a1e79926692a Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Tue, 9 Aug 2022 21:30:51 +0800 Subject: [PATCH 22/59] chore: fmt python code --- tests/logictest/logictest.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/logictest/logictest.py b/tests/logictest/logictest.py index 5628b034da90f..f3eca6b009779 100644 --- a/tests/logictest/logictest.py +++ b/tests/logictest/logictest.py @@ -413,9 +413,11 @@ def assert_execute_error(self, statement): actual = safe_execute(lambda: self.execute_error(statement.text), statement) if actual is None: - raise LogicError(message=f"expected error {statement.s_type.expect_error}, but got ok on statement: {statement.text} " , - errorType="Error code mismatch", - runner=self.kind) + raise LogicError( + message= + f"expected error {statement.s_type.expect_error}, but got ok on statement: {statement.text} ", + errorType="Error code mismatch", + runner=self.kind) match = re.search(statement.s_type.expect_error, actual.msg) if match is None: raise LogicError( From 307fb8a9fd008c66c1638620de81d524bd6a0251 Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Tue, 9 Aug 2022 21:31:10 +0800 Subject: [PATCH 23/59] chore: make RoleInfo.identity returns reference --- common/management/src/role/role_mgr.rs | 4 ++-- common/meta/types/src/role_info.rs | 4 ++-- common/users/src/role_cache_mgr.rs | 2 +- common/users/src/role_mgr.rs | 2 +- common/users/tests/it/role_cache_mgr.rs | 8 +++++--- .../tests/it/interpreters/interpreter_role_grant.rs | 8 ++++++-- .../tests/it/interpreters/interpreter_role_revoke.rs | 12 +++++++++--- 7 files changed, 26 insertions(+), 14 deletions(-) diff --git a/common/management/src/role/role_mgr.rs b/common/management/src/role/role_mgr.rs index f3e128f974873..1457f469d2e26 100644 --- a/common/management/src/role/role_mgr.rs +++ b/common/management/src/role/role_mgr.rs @@ -57,7 +57,7 @@ impl RoleMgr { role_info: &RoleInfo, seq: Option, ) -> common_exception::Result { - let key = self.make_role_key(&role_info.identity()); + let key = self.make_role_key(role_info.identity()); let value = serde_json::to_vec(&role_info)?; let match_seq = match seq { @@ -92,7 +92,7 @@ impl RoleMgr { impl RoleApi for RoleMgr { async fn add_role(&self, role_info: RoleInfo) -> common_exception::Result { let match_seq = MatchSeq::Exact(0); - let key = self.make_role_key(&role_info.identity()); + let key = self.make_role_key(role_info.identity()); let value = serde_json::to_vec(&role_info)?; let kv_api = self.kv_api.clone(); diff --git a/common/meta/types/src/role_info.rs b/common/meta/types/src/role_info.rs index fd49198c94913..38bdc8b583f91 100644 --- a/common/meta/types/src/role_info.rs +++ b/common/meta/types/src/role_info.rs @@ -35,8 +35,8 @@ impl RoleInfo { } } - pub fn identity(&self) -> String { - self.name.clone() + pub fn identity(&self) -> &str { + &self.name } } diff --git a/common/users/src/role_cache_mgr.rs b/common/users/src/role_cache_mgr.rs index 5a6c1bc93b8cc..30e10e35c0eb9 100644 --- a/common/users/src/role_cache_mgr.rs +++ b/common/users/src/role_cache_mgr.rs @@ -127,7 +127,7 @@ async fn load_roles_data(user_api: &Arc, tenant: &str) -> Resul let roles = user_api.get_roles(tenant).await?; let roles_map = roles .into_iter() - .map(|r| (r.identity(), r)) + .map(|r| (r.identity().to_string(), r)) .collect::>(); Ok(CachedRoles { roles: roles_map, diff --git a/common/users/src/role_mgr.rs b/common/users/src/role_mgr.rs index ba714bf80469d..421558c78503b 100644 --- a/common/users/src/role_mgr.rs +++ b/common/users/src/role_mgr.rs @@ -172,7 +172,7 @@ impl UserApiProvider { .get_roles(tenant) .await? .into_iter() - .map(|r| (r.identity(), r)) + .map(|r| (r.identity().to_string(), r)) .collect::>(); Ok(find_all_related_roles(&tenant_roles_map, role_identities)) } diff --git a/common/users/tests/it/role_cache_mgr.rs b/common/users/tests/it/role_cache_mgr.rs index fbe443d99a982..65cb4071a6939 100644 --- a/common/users/tests/it/role_cache_mgr.rs +++ b/common/users/tests/it/role_cache_mgr.rs @@ -73,8 +73,10 @@ async fn test_find_all_related_roles() -> Result<()> { "role1", "role2", "role3", "role4", "role5", ]), ]; - let mut cached: HashMap = - roles.into_iter().map(|r| (r.identity(), r)).collect(); + let mut cached: HashMap = roles + .into_iter() + .map(|r| (r.identity().to_string(), r)) + .collect(); for (lhs, rhs) in role_grants { cached .get_mut(&lhs.to_string()) @@ -85,7 +87,7 @@ async fn test_find_all_related_roles() -> Result<()> { for (input, want) in tests { let got: HashSet<_> = find_all_related_roles(&cached, &input) .into_iter() - .map(|r| r.identity()) + .map(|r| r.identity().to_string()) .collect(); let want: HashSet<_> = want.iter().map(|s| s.to_string()).collect(); assert_eq!(got, want); diff --git a/query/tests/it/interpreters/interpreter_role_grant.rs b/query/tests/it/interpreters/interpreter_role_grant.rs index 15408fdedc9d6..19f08d2035d18 100644 --- a/query/tests/it/interpreters/interpreter_role_grant.rs +++ b/query/tests/it/interpreters/interpreter_role_grant.rs @@ -88,7 +88,9 @@ async fn test_grant_role_interpreter() -> Result<()> { // Grant role to normal role. { user_mgr.add_role(&tenant, test_role.clone(), false).await?; - let role_info = user_mgr.get_role(&tenant, test_role.identity()).await?; + let role_info = user_mgr + .get_role(&tenant, test_role.identity().into()) + .await?; assert_eq!(role_info.grants.roles().len(), 0); let query = "GRANT ROLE 'test' TO ROLE 'test_role'"; @@ -96,7 +98,9 @@ async fn test_grant_role_interpreter() -> Result<()> { let executor = InterpreterFactoryV2::get(ctx.clone(), &plan)?; let _ = executor.execute().await?; - let role_info = user_mgr.get_role(&tenant, test_role.identity()).await?; + let role_info = user_mgr + .get_role(&tenant, test_role.identity().into()) + .await?; let roles = role_info.grants.roles(); assert_eq!(roles.len(), 1); assert_eq!(roles[0], "test".to_string()); diff --git a/query/tests/it/interpreters/interpreter_role_revoke.rs b/query/tests/it/interpreters/interpreter_role_revoke.rs index 03dbfb6201597..d7cdda3b8cd0d 100644 --- a/query/tests/it/interpreters/interpreter_role_revoke.rs +++ b/query/tests/it/interpreters/interpreter_role_revoke.rs @@ -87,7 +87,9 @@ async fn test_revoke_role_interpreter() -> Result<()> { let mut test_role = RoleInfo::new("test_role"); test_role.grants.grant_role("test".to_string()); user_mgr.add_role(&tenant, test_role.clone(), false).await?; - let role_info = user_mgr.get_role(&tenant, test_role.identity()).await?; + let role_info = user_mgr + .get_role(&tenant, test_role.identity().to_string()) + .await?; assert_eq!(role_info.grants.roles().len(), 1); // Revoke role from normal role. @@ -97,7 +99,9 @@ async fn test_revoke_role_interpreter() -> Result<()> { let executor = InterpreterFactoryV2::get(ctx.clone(), &plan)?; let _ = executor.execute().await?; - let role_info = user_mgr.get_role(&tenant, test_role.identity()).await?; + let role_info = user_mgr + .get_role(&tenant, test_role.identity().to_string()) + .await?; let roles = role_info.grants.roles(); assert_eq!(roles.len(), 0); } @@ -109,7 +113,9 @@ async fn test_revoke_role_interpreter() -> Result<()> { let executor = InterpreterFactoryV2::get(ctx.clone(), &plan)?; let _ = executor.execute().await?; - let role_info = user_mgr.get_role(&tenant, test_role.identity()).await?; + let role_info = user_mgr + .get_role(&tenant, test_role.identity().to_string()) + .await?; let roles = role_info.grants.roles(); assert_eq!(roles.len(), 0); } From 4a2c9682e0237da0099c06b304d08fdb21bf1d28 Mon Sep 17 00:00:00 2001 From: lichuang Date: Tue, 9 Aug 2022 22:04:09 +0800 Subject: [PATCH 24/59] refactor by comment --- query/src/sql/planner/binder/ddl/share.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/query/src/sql/planner/binder/ddl/share.rs b/query/src/sql/planner/binder/ddl/share.rs index 9a7c75a0a2d7a..eb07d3ac7536d 100644 --- a/query/src/sql/planner/binder/ddl/share.rs +++ b/query/src/sql/planner/binder/ddl/share.rs @@ -121,11 +121,7 @@ impl<'a> Binder { share, if_exists: *if_exists, is_add: *is_add, - accounts: tenants - .iter() - .map(|v| v.to_string()) - .into_iter() - .collect_vec(), + accounts: tenants.iter().map(|v| v.to_string()).collect_vec(), }; Ok(Plan::AlterShareAccounts(Box::new(plan))) } From f4efcf8d5c9eed477e4210ac81e4325f63fcc3d4 Mon Sep 17 00:00:00 2001 From: andylokandy Date: Tue, 9 Aug 2022 23:17:09 +0800 Subject: [PATCH 25/59] feat(expr): implement constant folding --- Cargo.lock | 30 +- common/expression/Cargo.toml | 3 +- common/expression/src/chunk.rs | 8 + common/expression/src/display.rs | 109 ++-- common/expression/src/evaluator.rs | 568 +++++------------- common/expression/src/expression.rs | 5 +- common/expression/src/function.rs | 33 +- common/expression/src/property.rs | 165 ++--- common/expression/src/types.rs | 11 +- common/expression/src/types/array.rs | 6 +- common/expression/src/types/boolean.rs | 7 - common/expression/src/types/empty_array.rs | 2 - common/expression/src/types/generic.rs | 4 - common/expression/src/types/map.rs | 6 - common/expression/src/types/null.rs | 2 - common/expression/src/types/nullable.rs | 7 - common/expression/src/types/number.rs | 272 ++++----- common/expression/src/types/string.rs | 9 +- common/expression/src/values.rs | 142 +++-- common/expression/tests/it/main.rs | 73 ++- .../expression/tests/it/testdata/run-pass.txt | 178 +++--- common/functions-v2/src/scalars/control.rs | 8 +- common/functions-v2/src/scalars/string.rs | 8 +- common/functions-v2/tests/it/scalars/mod.rs | 32 +- .../tests/it/scalars/testdata/boolean.txt | 7 + .../tests/it/scalars/testdata/control.txt | 22 +- .../tests/it/scalars/testdata/string.txt | 187 +++--- 27 files changed, 881 insertions(+), 1023 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fbd45bbb27be2..1a1ea4cd6d49b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1163,12 +1163,13 @@ dependencies = [ "comfy-table", "common-arrow", "common-ast", + "educe", "enum-as-inner", "goldenfile", "itertools", "match-template", "num-traits", - "ordered-float 1.1.1", + "ordered-float 3.0.0", "serde", ] @@ -2706,6 +2707,18 @@ dependencies = [ "getrandom 0.2.7", ] +[[package]] +name = "educe" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c07b7cc9cd8c08d10db74fca3b20949b9b6199725c04a0cce6d543496098fcac" +dependencies = [ + "enum-ordinalize", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "either" version = "1.7.0" @@ -2784,6 +2797,20 @@ dependencies = [ "syn", ] +[[package]] +name = "enum-ordinalize" +version = "3.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2170fc0efee383079a8bdd05d6ea2a184d2a0f07a1c1dcabdb2fd5e9f24bc36c" +dependencies = [ + "num-bigint", + "num-traits", + "proc-macro2", + "quote", + "rustc_version", + "syn", +] + [[package]] name = "enum_dispatch" version = "0.3.8" @@ -5126,6 +5153,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96bcbab4bfea7a59c2c0fe47211a1ac4e3e96bea6eb446d704f310bc5c732ae2" dependencies = [ "num-traits", + "serde", ] [[package]] diff --git a/common/expression/Cargo.toml b/common/expression/Cargo.toml index 1c684b21c55c6..775c822938a06 100755 --- a/common/expression/Cargo.toml +++ b/common/expression/Cargo.toml @@ -18,11 +18,12 @@ common-arrow = { path = "../arrow" } # Crates.io dependencies chrono-tz = "0.6.1" comfy-table = "6" +educe = "0.4" enum-as-inner = "0.4" itertools = "0.10" match-template = "0.0.1" num-traits = "0.2" -ordered-float = "1.0" +ordered-float = { version = "3.0", features = ["serde"] } serde = "1.0" [dev-dependencies] diff --git a/common/expression/src/chunk.rs b/common/expression/src/chunk.rs index e8396e9dbe124..bd33f98c7e035 100644 --- a/common/expression/src/chunk.rs +++ b/common/expression/src/chunk.rs @@ -13,6 +13,7 @@ // limitations under the License. use crate::types::AnyType; +use crate::Domain; use crate::Value; /// Chunk is a lightweight container for a group of columns. @@ -63,4 +64,11 @@ impl Chunk { pub fn num_columns(&self) -> usize { self.columns.len() } + + pub fn domains(&self) -> Vec { + self.columns + .iter() + .map(|value| value.as_ref().domain()) + .collect() + } } diff --git a/common/expression/src/display.rs b/common/expression/src/display.rs index 82fe937e0c71d..310891e0cd760 100755 --- a/common/expression/src/display.rs +++ b/common/expression/src/display.rs @@ -28,18 +28,18 @@ use crate::function::Function; use crate::function::FunctionSignature; use crate::property::BooleanDomain; use crate::property::Domain; -use crate::property::FloatDomain; use crate::property::FunctionProperty; -use crate::property::IntDomain; use crate::property::NullableDomain; use crate::property::StringDomain; -use crate::property::UIntDomain; +use crate::types::number::Number; use crate::types::AnyType; use crate::types::DataType; use crate::types::ValueType; use crate::values::ScalarRef; use crate::values::Value; use crate::values::ValueRef; +use crate::with_number_type; +use crate::NumberDomain; impl Debug for Chunk { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { @@ -76,22 +76,51 @@ impl Display for Chunk { } } +impl<'a> Debug for ScalarRef<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + ScalarRef::Null => write!(f, "NULL"), + ScalarRef::EmptyArray => write!(f, "[]"), + ScalarRef::Int8(val) => write!(f, "{val}_i8"), + ScalarRef::Int16(val) => write!(f, "{val}_i16"), + ScalarRef::Int32(val) => write!(f, "{val}_i32"), + ScalarRef::Int64(val) => write!(f, "{val}_i64"), + ScalarRef::UInt8(val) => write!(f, "{val}_u8"), + ScalarRef::UInt16(val) => write!(f, "{val}_u16"), + ScalarRef::UInt32(val) => write!(f, "{val}_u32"), + ScalarRef::UInt64(val) => write!(f, "{val}_u64"), + ScalarRef::Float32(val) => write!(f, "{val:?}_f32"), + ScalarRef::Float64(val) => write!(f, "{val:?}_f64"), + ScalarRef::Boolean(val) => write!(f, "{val}"), + ScalarRef::String(s) => write!(f, "{:?}", String::from_utf8_lossy(s)), + ScalarRef::Array(col) => write!(f, "[{}]", col.iter().join(", ")), + ScalarRef::Tuple(fields) => { + write!( + f, + "({})", + fields.iter().map(ScalarRef::to_string).join(", ") + ) + } + } + } +} + impl<'a> Display for ScalarRef<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { ScalarRef::Null => write!(f, "NULL"), ScalarRef::EmptyArray => write!(f, "[]"), - ScalarRef::Int8(i) => write!(f, "{}", i), - ScalarRef::Int16(i) => write!(f, "{}", i), - ScalarRef::Int32(i) => write!(f, "{}", i), - ScalarRef::Int64(i) => write!(f, "{}", i), - ScalarRef::UInt8(i) => write!(f, "{}", i), - ScalarRef::UInt16(i) => write!(f, "{}", i), - ScalarRef::UInt32(i) => write!(f, "{}", i), - ScalarRef::UInt64(i) => write!(f, "{}", i), - ScalarRef::Float32(i) => write!(f, "{:?}", i), - ScalarRef::Float64(i) => write!(f, "{:?}", i), - ScalarRef::Boolean(b) => write!(f, "{}", b), + ScalarRef::Int8(val) => write!(f, "{val}"), + ScalarRef::Int16(val) => write!(f, "{val}"), + ScalarRef::Int32(val) => write!(f, "{val}"), + ScalarRef::Int64(val) => write!(f, "{val}"), + ScalarRef::UInt8(val) => write!(f, "{val}"), + ScalarRef::UInt16(val) => write!(f, "{val}"), + ScalarRef::UInt32(val) => write!(f, "{val}"), + ScalarRef::UInt64(val) => write!(f, "{val}"), + ScalarRef::Float32(val) => write!(f, "{val:?}"), + ScalarRef::Float64(val) => write!(f, "{val:?}"), + ScalarRef::Boolean(val) => write!(f, "{val}"), ScalarRef::String(s) => write!(f, "{:?}", String::from_utf8_lossy(s)), ScalarRef::Array(col) => write!(f, "[{}]", col.iter().join(", ")), ScalarRef::Tuple(fields) => { @@ -152,16 +181,16 @@ impl Display for Literal { match self { Literal::Null => write!(f, "NULL"), Literal::Boolean(val) => write!(f, "{val}"), - Literal::UInt8(val) => write!(f, "{val}_u8"), - Literal::UInt16(val) => write!(f, "{val}_u16"), - Literal::UInt32(val) => write!(f, "{val}_u32"), Literal::UInt64(val) => write!(f, "{val}_u64"), - Literal::Float32(val) => write!(f, "{val}_f32"), - Literal::Float64(val) => write!(f, "{val}_f64"), Literal::Int8(val) => write!(f, "{val}_i8"), Literal::Int16(val) => write!(f, "{val}_i16"), Literal::Int32(val) => write!(f, "{val}_i32"), Literal::Int64(val) => write!(f, "{val}_i64"), + Literal::UInt8(val) => write!(f, "{val}_u8"), + Literal::UInt16(val) => write!(f, "{val}_u16"), + Literal::UInt32(val) => write!(f, "{val}_u32"), + Literal::Float32(val) => write!(f, "{val}_f32"), + Literal::Float64(val) => write!(f, "{val}_f64"), Literal::String(val) => write!(f, "{:?}", String::from_utf8_lossy(val)), } } @@ -209,7 +238,7 @@ impl Display for DataType { impl Display for Expr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Expr::Constant { scalar, .. } => write!(f, "{}", scalar.as_ref()), + Expr::Constant { scalar, .. } => write!(f, "{:?}", scalar.as_ref()), Expr::ColumnRef { id, .. } => write!(f, "ColumnRef({id})"), Expr::Cast { expr, dest_type, .. @@ -259,24 +288,6 @@ impl Display for Expr { } } -impl Debug for Value { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Value::Scalar(s) => write!(f, "Scalar({:?})", s), - Value::Column(c) => write!(f, "Column({:?})", c), - } - } -} - -impl<'a, T: ValueType> Debug for ValueRef<'a, T> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - ValueRef::Scalar(s) => write!(f, "Scalar({:?})", s), - ValueRef::Column(c) => write!(f, "Column({:?})", c), - } - } -} - impl Display for Value { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { match self { @@ -368,19 +379,7 @@ impl Display for StringDomain { } } -impl Display for IntDomain { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{{{}..={}}}", self.min, self.max) - } -} - -impl Display for UIntDomain { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{{{}..={}}}", self.min, self.max) - } -} - -impl Display for FloatDomain { +impl Display for NumberDomain { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{{{:?}..={:?}}}", self.min, self.max) } @@ -388,10 +387,8 @@ impl Display for FloatDomain { impl Display for Domain { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - Domain::Int(domain) => write!(f, "{domain}"), - Domain::UInt(domain) => write!(f, "{domain}"), - Domain::Float(domain) => write!(f, "{domain}"), + with_number_type!(|TYPE| match self { + Domain::TYPE(domain) => write!(f, "{domain}"), Domain::Boolean(domain) => write!(f, "{domain}"), Domain::String(domain) => write!(f, "{domain}"), Domain::Nullable(domain) => write!(f, "{domain}"), @@ -408,6 +405,6 @@ impl Display for Domain { write!(f, ")") } Domain::Undefined => write!(f, "_"), - } + }) } } diff --git a/common/expression/src/evaluator.rs b/common/expression/src/evaluator.rs index 25ef4e0c6e6d7..1cbe2cb1a5eb9 100644 --- a/common/expression/src/evaluator.rs +++ b/common/expression/src/evaluator.rs @@ -15,20 +15,14 @@ use common_arrow::arrow::bitmap; use common_arrow::arrow::bitmap::MutableBitmap; use itertools::Itertools; -use num_traits::ToPrimitive; use crate::chunk::Chunk; use crate::error::Result; use crate::expression::Expr; use crate::expression::Span; use crate::function::FunctionContext; -use crate::property::BooleanDomain; use crate::property::Domain; -use crate::property::FloatDomain; -use crate::property::IntDomain; use crate::property::NullableDomain; -use crate::property::StringDomain; -use crate::property::UIntDomain; use crate::types::any::AnyType; use crate::types::array::ArrayColumn; use crate::types::nullable::NullableColumn; @@ -40,14 +34,14 @@ use crate::values::Scalar; use crate::values::Value; use crate::with_number_type; -pub struct Evaluator { - pub input_columns: Chunk, +pub struct Evaluator<'a> { + pub input_columns: &'a Chunk, pub context: FunctionContext, } -impl Evaluator { +impl<'a> Evaluator<'a> { pub fn run(&self, expr: &Expr) -> Result> { - match expr { + let result = match expr { Expr::Constant { scalar, .. } => Ok(Value::Scalar(scalar.clone())), Expr::ColumnRef { id, .. } => Ok(self.input_columns.columns()[*id].clone()), Expr::FunctionCall { @@ -110,7 +104,20 @@ impl Evaluator { ))), } } + }; + + #[cfg(debug_assertions)] + if result.is_err() { + assert_eq!( + ConstantFolder::new(&self.input_columns.domains()) + .fold(expr) + .1, + None, + "domain calculation should not return any domain for expressions that are possible to fail" + ); } + + result } pub fn run_cast_scalar( @@ -147,9 +154,9 @@ impl Evaluator { (scalar, dest_ty) => { // number types - with_number_type!(SRC_TYPE, match scalar { + with_number_type!(|SRC_TYPE| match scalar { Scalar::SRC_TYPE(value) => { - with_number_type!(DEST_TYPE, match dest_ty { + with_number_type!(|DEST_TYPE| match dest_ty { DataType::DEST_TYPE => { let src_info = DataType::SRC_TYPE.number_type_info().unwrap(); let dest_info = DataType::DEST_TYPE.number_type_info().unwrap(); @@ -246,9 +253,9 @@ impl Evaluator { (col, dest_ty) => { // number types - with_number_type!(SRC_TYPE, match &col { + with_number_type!(|SRC_TYPE| match &col { Column::SRC_TYPE(col) => { - with_number_type!(DEST_TYPE, match dest_ty { + with_number_type!(|DEST_TYPE| match dest_ty { DataType::DEST_TYPE => { let src_info = DataType::SRC_TYPE.number_type_info().unwrap(); let dest_info = DataType::DEST_TYPE.number_type_info().unwrap(); @@ -363,9 +370,9 @@ impl Evaluator { (col, dest_ty) => { // number types - with_number_type!(SRC_TYPE, match &col { + with_number_type!(|SRC_TYPE| match &col { Column::SRC_TYPE(col) => { - with_number_type!(DEST_TYPE, match dest_ty { + with_number_type!(|DEST_TYPE| match dest_ty { DataType::DEST_TYPE => { let src_info = DataType::SRC_TYPE.number_type_info().unwrap(); let dest_info = DataType::DEST_TYPE.number_type_info().unwrap(); @@ -411,107 +418,112 @@ impl Evaluator { } } -pub struct DomainCalculator { - input_domains: Vec, +pub struct ConstantFolder<'a> { + input_domains: &'a [Domain], } -impl DomainCalculator { - pub fn new(input_domains: Vec) -> Self { - DomainCalculator { input_domains } +impl<'a> ConstantFolder<'a> { + pub fn new(input_domains: &'a [Domain]) -> Self { + ConstantFolder { input_domains } } - pub fn calculate(&self, expr: &Expr) -> Result { + pub fn fold(&self, expr: &Expr) -> (Expr, Option) { match expr { - Expr::Constant { scalar, .. } => Ok(self.calculate_constant(scalar)), - Expr::ColumnRef { id, .. } => Ok(self.input_domains[*id].clone()), + Expr::Constant { scalar, .. } => (expr.clone(), Some(scalar.as_ref().domain())), + Expr::ColumnRef { span, id } => { + let domain = &self.input_domains[*id]; + let expr = domain + .as_singleton() + .map(|scalar| Expr::Constant { + span: span.clone(), + scalar, + }) + .unwrap_or_else(|| expr.clone()); + (expr, Some(domain.clone())) + } Expr::Cast { span, expr, dest_type, } => { - let domain = self.calculate(expr)?; - self.calculate_cast(span.clone(), &domain, dest_type) + let (inner_expr, inner_domain) = self.fold(expr); + let cast_domain = inner_domain.and_then(|inner_domain| { + self.calculate_cast(span.clone(), &inner_domain, dest_type) + }); + let cast_expr = cast_domain + .as_ref() + .and_then(Domain::as_singleton) + .map(|scalar| Expr::Constant { + span: span.clone(), + scalar, + }) + .unwrap_or_else(|| Expr::Cast { + span: span.clone(), + expr: Box::new(inner_expr), + dest_type: dest_type.clone(), + }); + (cast_expr, cast_domain) } Expr::TryCast { span, expr, dest_type, } => { - let domain = self.calculate(expr)?; - Ok(self.calculate_try_cast(span.clone(), &domain, dest_type)) + let (inner_expr, inner_domain) = self.fold(expr); + let try_cast_domain = inner_domain.map(|inner_domain| { + self.calculate_try_cast(span.clone(), &inner_domain, dest_type) + }); + let try_cast_expr = try_cast_domain + .as_ref() + .and_then(Domain::as_singleton) + .map(|scalar| Expr::Constant { + span: span.clone(), + scalar, + }) + .unwrap_or_else(|| Expr::TryCast { + span: span.clone(), + expr: Box::new(inner_expr), + dest_type: dest_type.clone(), + }); + (try_cast_expr, try_cast_domain) } Expr::FunctionCall { + span, + id, function, generics, args, - .. } => { - let args_domain = args - .iter() - .map(|arg| self.calculate(arg)) - .collect::>>()?; - Ok((function.calc_domain)(&args_domain, generics)) - } - } - } + let (mut args_expr, mut args_domain) = (Vec::new(), Vec::new()); - pub fn calculate_constant(&self, scalar: &Scalar) -> Domain { - match scalar { - Scalar::Null => Domain::Nullable(NullableDomain { - has_null: true, - value: None, - }), - Scalar::EmptyArray => Domain::Array(None), - Scalar::Int8(i) => Domain::Int(IntDomain { - min: *i as i64, - max: *i as i64, - }), - Scalar::Int16(i) => Domain::Int(IntDomain { - min: *i as i64, - max: *i as i64, - }), - Scalar::Int32(i) => Domain::Int(IntDomain { - min: *i as i64, - max: *i as i64, - }), - Scalar::Int64(i) => Domain::Int(IntDomain { min: *i, max: *i }), - Scalar::UInt8(i) => Domain::UInt(UIntDomain { - min: *i as u64, - max: *i as u64, - }), - Scalar::UInt16(i) => Domain::UInt(UIntDomain { - min: *i as u64, - max: *i as u64, - }), - Scalar::UInt32(i) => Domain::UInt(UIntDomain { - min: *i as u64, - max: *i as u64, - }), - Scalar::UInt64(i) => Domain::UInt(UIntDomain { min: *i, max: *i }), - Scalar::Float32(i) => Domain::Float(FloatDomain { - min: *i as f64, - max: *i as f64, - }), - Scalar::Float64(i) => Domain::Float(FloatDomain { min: *i, max: *i }), - Scalar::Boolean(true) => Domain::Boolean(BooleanDomain { - has_false: false, - has_true: true, - }), - Scalar::Boolean(false) => Domain::Boolean(BooleanDomain { - has_false: true, - has_true: false, - }), - Scalar::String(s) => Domain::String(StringDomain { - min: s.clone(), - max: Some(s.clone()), - }), - Scalar::Array(array) => Domain::Array(Some(Box::new(array.domain()))), - Scalar::Tuple(fields) => Domain::Tuple( - fields - .iter() - .map(|field| self.calculate_constant(field)) - .collect(), - ), + for arg in args { + let (expr, domain) = self.fold(arg); + if let Some(domain) = domain { + args_expr.push(expr); + args_domain.push(domain); + } else { + return (expr, None); + } + } + + let func_domain = (function.calc_domain)(&args_domain, generics); + let func_expr = func_domain + .as_ref() + .and_then(Domain::as_singleton) + .map(|scalar| Expr::Constant { + span: span.clone(), + scalar, + }) + .unwrap_or_else(|| Expr::FunctionCall { + span: span.clone(), + id: id.clone(), + function: function.clone(), + generics: generics.clone(), + args: args_expr, + }); + + (func_expr, func_domain) + } } } @@ -520,14 +532,14 @@ impl DomainCalculator { span: Span, domain: &Domain, dest_type: &DataType, - ) -> Result { + ) -> Option { match (domain, dest_type) { ( Domain::Nullable(NullableDomain { value: None, .. }), DataType::Null | DataType::Nullable(_), - ) => Ok(domain.clone()), + ) => Some(domain.clone()), (Domain::Array(None), DataType::EmptyArray | DataType::Array(_)) => { - Ok(Domain::Array(None)) + Some(Domain::Array(None)) } ( Domain::Nullable(NullableDomain { @@ -535,196 +547,52 @@ impl DomainCalculator { value: Some(value), }), DataType::Nullable(ty), - ) => Ok(Domain::Nullable(NullableDomain { + ) => Some(Domain::Nullable(NullableDomain { has_null: *has_null, value: Some(Box::new(self.calculate_cast(span, value, ty)?)), })), - (domain, DataType::Nullable(ty)) => Ok(Domain::Nullable(NullableDomain { + (domain, DataType::Nullable(ty)) => Some(Domain::Nullable(NullableDomain { has_null: false, value: Some(Box::new(self.calculate_cast(span, domain, ty)?)), })), - (Domain::Array(Some(domain)), DataType::Array(ty)) => Ok(Domain::Array(Some( + (Domain::Array(Some(domain)), DataType::Array(ty)) => Some(Domain::Array(Some( Box::new(self.calculate_cast(span, domain, ty)?), ))), - (Domain::Tuple(fields), DataType::Tuple(fields_ty)) => Ok(Domain::Tuple( + (Domain::Tuple(fields), DataType::Tuple(fields_ty)) => Some(Domain::Tuple( fields .iter() .zip(fields_ty) .map(|(field, ty)| self.calculate_cast(span.clone(), field, ty)) - .collect::>>()?, + .collect::>>()?, )), // identical types (Domain::Boolean(_), DataType::Boolean) | (Domain::String(_), DataType::String) => { - Ok(domain.clone()) + Some(domain.clone()) } - // number types - (Domain::UInt(UIntDomain { min, max }), DataType::UInt8) => { - Ok(Domain::UInt(UIntDomain { - min: (*min).min(u8::MAX as u64), - max: (*max).min(u8::MAX as u64), - })) - } - (Domain::UInt(UIntDomain { min, max }), DataType::UInt16) => { - Ok(Domain::UInt(UIntDomain { - min: (*min).min(u16::MAX as u64), - max: (*max).min(u16::MAX as u64), - })) - } - (Domain::UInt(UIntDomain { min, max }), DataType::UInt32) => { - Ok(Domain::UInt(UIntDomain { - min: (*min).min(u32::MAX as u64), - max: (*max).min(u32::MAX as u64), - })) - } - (Domain::UInt(_), DataType::UInt64) => Ok(domain.clone()), - (Domain::UInt(UIntDomain { min, max }), DataType::Int8) => Ok(Domain::Int(IntDomain { - min: (*min).min(i8::MAX as u64) as i64, - max: (*max).min(i8::MAX as u64) as i64, - })), - (Domain::UInt(UIntDomain { min, max }), DataType::Int16) => { - Ok(Domain::Int(IntDomain { - min: (*min).min(i16::MAX as u64) as i64, - max: (*max).min(i16::MAX as u64) as i64, - })) - } - (Domain::UInt(UIntDomain { min, max }), DataType::Int32) => { - Ok(Domain::Int(IntDomain { - min: (*min).min(i32::MAX as u64) as i64, - max: (*max).min(i32::MAX as u64) as i64, - })) - } - (Domain::UInt(UIntDomain { min, max }), DataType::Int64) => { - Ok(Domain::Int(IntDomain { - min: (*min).min(i64::MAX as u64) as i64, - max: (*max).min(i64::MAX as u64) as i64, - })) - } - (Domain::UInt(UIntDomain { min, max }), DataType::Float32) => { - // Cast to f32 and then to f64 to round to the nearest f32 value. - Ok(Domain::Float(FloatDomain { - min: *min as f32 as f64, - max: *max as f32 as f64, - })) - } - (Domain::UInt(UIntDomain { min, max }), DataType::Float64) => { - Ok(Domain::Float(FloatDomain { - min: *min as f64, - max: *max as f64, - })) - } - - (Domain::Int(IntDomain { min, max }), DataType::UInt8) => { - Ok(Domain::UInt(UIntDomain { - min: (*min).clamp(0, u8::MAX as i64) as u64, - max: (*max).clamp(0, u8::MAX as i64) as u64, - })) - } - (Domain::Int(IntDomain { min, max }), DataType::UInt16) => { - Ok(Domain::UInt(UIntDomain { - min: (*min).clamp(0, u16::MAX as i64) as u64, - max: (*max).clamp(0, u16::MAX as i64) as u64, - })) - } - (Domain::Int(IntDomain { min, max }), DataType::UInt32) => { - Ok(Domain::UInt(UIntDomain { - min: (*min).clamp(0, u32::MAX as i64) as u64, - max: (*max).clamp(0, u32::MAX as i64) as u64, - })) - } - (Domain::Int(IntDomain { min, max }), DataType::UInt64) => { - Ok(Domain::UInt(UIntDomain { - min: (*min).max(0) as u64, - max: (*max).max(0) as u64, - })) - } - (Domain::Int(IntDomain { min, max }), DataType::Int8) => Ok(Domain::Int(IntDomain { - min: (*min).clamp(i8::MIN as i64, i8::MAX as i64), - max: (*max).clamp(i8::MIN as i64, i8::MAX as i64), - })), - (Domain::Int(IntDomain { min, max }), DataType::Int16) => Ok(Domain::Int(IntDomain { - min: (*min).clamp(i16::MIN as i64, i16::MAX as i64), - max: (*max).clamp(i16::MIN as i64, i16::MAX as i64), - })), - (Domain::Int(IntDomain { min, max }), DataType::Int32) => Ok(Domain::Int(IntDomain { - min: (*min).clamp(i32::MIN as i64, i32::MAX as i64), - max: (*max).clamp(i32::MIN as i64, i32::MAX as i64), - })), - (Domain::Int(_), DataType::Int64) => Ok(domain.clone()), - (Domain::Int(IntDomain { min, max }), DataType::Float32) => { - // Cast to f32 and then to f64 to round to the nearest f32 value. - Ok(Domain::Float(FloatDomain { - min: (*min) as f32 as f64, - max: (*max) as f32 as f64, - })) - } - (Domain::Int(IntDomain { min, max }), DataType::Float64) => { - Ok(Domain::Float(FloatDomain { - min: (*min) as f64, - max: (*max) as f64, - })) - } + (domain, dest_ty) => { + // number types + with_number_type!(|SRC_TYPE| match domain { + Domain::SRC_TYPE(domain) => { + with_number_type!(|DEST_TYPE| match dest_ty { + DataType::DEST_TYPE => { + let (domain, overflowing) = domain.overflow_cast(); + if overflowing { + return None; + } else { + return Some(Domain::DEST_TYPE(domain)); + } + } + _ => (), + }) + } + _ => (), + }); - (Domain::Float(FloatDomain { min, max }), DataType::UInt8) => { - Ok(Domain::UInt(UIntDomain { - min: (*min).round().clamp(0.0, u8::MAX as f64) as u64, - max: (*max).round().clamp(0.0, u8::MAX as f64) as u64, - })) - } - (Domain::Float(FloatDomain { min, max }), DataType::UInt16) => { - Ok(Domain::UInt(UIntDomain { - min: (*min).round().clamp(0.0, u16::MAX as f64) as u64, - max: (*max).round().clamp(0.0, u16::MAX as f64) as u64, - })) - } - (Domain::Float(FloatDomain { min, max }), DataType::UInt32) => { - Ok(Domain::UInt(UIntDomain { - min: (*min).round().clamp(0.0, u32::MAX as f64) as u64, - max: (*max).round().clamp(0.0, u32::MAX as f64) as u64, - })) - } - (Domain::Float(FloatDomain { min, max }), DataType::UInt64) => { - Ok(Domain::UInt(UIntDomain { - min: (*min).round().clamp(0.0, u64::MAX as f64) as u64, - max: (*max).round().clamp(0.0, u64::MAX as f64) as u64, - })) - } - (Domain::Float(FloatDomain { min, max }), DataType::Int8) => { - Ok(Domain::Int(IntDomain { - min: (*min).round().clamp(i8::MIN as f64, i8::MAX as f64) as i64, - max: (*max).round().clamp(i8::MIN as f64, i8::MAX as f64) as i64, - })) - } - (Domain::Float(FloatDomain { min, max }), DataType::Int16) => { - Ok(Domain::Int(IntDomain { - min: (*min).round().clamp(i16::MIN as f64, i16::MAX as f64) as i64, - max: (*max).round().clamp(i16::MIN as f64, i16::MAX as f64) as i64, - })) - } - (Domain::Float(FloatDomain { min, max }), DataType::Int32) => { - Ok(Domain::Int(IntDomain { - min: (*min).round().clamp(i32::MIN as f64, i32::MAX as f64) as i64, - max: (*max).round().clamp(i32::MIN as f64, i32::MAX as f64) as i64, - })) - } - (Domain::Float(FloatDomain { min, max }), DataType::Int64) => { - Ok(Domain::Int(IntDomain { - min: (*min).round().clamp(i64::MIN as f64, i64::MAX as f64) as i64, - max: (*max).round().clamp(i64::MIN as f64, i64::MAX as f64) as i64, - })) - } - (Domain::Float(FloatDomain { min, max }), DataType::Float32) => { - Ok(Domain::Float(FloatDomain { - // Cast to f32 and back to f64 to round to the nearest f32 value. - min: (*min) as f32 as f64, - max: (*max) as f32 as f64, - })) + // failure cases + None } - (Domain::Float(_), DataType::Float64) => Ok(domain.clone()), - - // failure cases - (domain, dest_ty) => Err((span, (format!("unable to cast {domain} to {dest_ty}",)))), } } @@ -783,154 +651,30 @@ impl DomainCalculator { }) } - // numeric types - ( - Domain::UInt(_), - DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64, - ) => { - let new_domain = self.calculate_cast(span, domain, inner_type).unwrap(); - Domain::Nullable(NullableDomain { - has_null: *domain != new_domain, - value: Some(Box::new(new_domain)), - }) - } - ( - Domain::UInt(UIntDomain { min, max }), - DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64, - ) => { - let new_domain = self - .calculate_cast(span, domain, inner_type) - .unwrap() - .into_int() - .unwrap(); - let has_null = min.to_i64().filter(|min| *min == new_domain.min).is_none() - || max.to_i64().filter(|max| *max == new_domain.max).is_none(); - Domain::Nullable(NullableDomain { - has_null, - value: Some(Box::new(Domain::Int(new_domain))), - }) - } - (Domain::UInt(UIntDomain { min, max }), DataType::Float32 | DataType::Float64) => { - let new_domain = self - .calculate_cast(span, domain, inner_type) - .unwrap() - .into_float() - .unwrap(); - let has_null = (*min) - .to_f64() - .filter(|min| *min == new_domain.min) - .is_none() - || (*max) - .to_f64() - .filter(|max| *max == new_domain.max) - .is_none(); - Domain::Nullable(NullableDomain { - has_null, - value: Some(Box::new(Domain::Float(new_domain))), - }) - } - ( - Domain::Int(IntDomain { min, max }), - DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64, - ) => { - let new_domain = self - .calculate_cast(span, domain, inner_type) - .unwrap() - .into_u_int() - .unwrap(); - let has_null = min.to_u64().filter(|min| *min == new_domain.min).is_none() - || max.to_u64().filter(|max| *max == new_domain.max).is_none(); - Domain::Nullable(NullableDomain { - has_null, - value: Some(Box::new(Domain::UInt(new_domain))), - }) - } - ( - Domain::Int(_), - DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64, - ) => { - let new_domain = self.calculate_cast(span, domain, inner_type).unwrap(); - Domain::Nullable(NullableDomain { - has_null: *domain != new_domain, - value: Some(Box::new(new_domain)), - }) - } - (Domain::Int(IntDomain { min, max }), DataType::Float32 | DataType::Float64) => { - let new_domain = self - .calculate_cast(span, domain, inner_type) - .unwrap() - .into_float() - .unwrap(); - let has_null = (*min) - .to_f64() - .filter(|min| *min == new_domain.min) - .is_none() - || (*max) - .to_f64() - .filter(|max| *max == new_domain.max) - .is_none(); - Domain::Nullable(NullableDomain { - has_null, - value: Some(Box::new(Domain::Float(new_domain))), - }) - } - ( - Domain::Float(FloatDomain { min, max }), - DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64, - ) => { - let new_domain = self - .calculate_cast(span, domain, inner_type) - .unwrap() - .into_u_int() - .unwrap(); - let has_null = (*min) - .to_u64() - .filter(|min| *min == new_domain.min) - .is_none() - || (*max) - .to_u64() - .filter(|max| *max == new_domain.max) - .is_none(); - Domain::Nullable(NullableDomain { - has_null, - value: Some(Box::new(Domain::UInt(new_domain))), - }) - } - ( - Domain::Float(FloatDomain { min, max }), - DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64, - ) => { - let new_domain = self - .calculate_cast(span, domain, inner_type) - .unwrap() - .into_int() - .unwrap(); - let has_null = (*min) - .to_i64() - .filter(|min| *min == new_domain.min) - .is_none() - || (*max) - .to_i64() - .filter(|max| *max == new_domain.max) - .is_none(); - Domain::Nullable(NullableDomain { - has_null, - value: Some(Box::new(Domain::Int(new_domain))), - }) - } - (Domain::Float(_), DataType::Float32 | DataType::Float64) => { - let new_domain = self.calculate_cast(span, domain, inner_type).unwrap(); + (domain, dest_ty) => { + // number types + with_number_type!(|SRC_TYPE| match domain { + Domain::SRC_TYPE(domain) => { + with_number_type!(|DEST_TYPE| match dest_ty { + DataType::DEST_TYPE => { + let (domain, overflowing) = domain.overflow_cast(); + return Domain::Nullable(NullableDomain { + has_null: overflowing, + value: Some(Box::new(Domain::DEST_TYPE(domain))), + }); + } + _ => (), + }) + } + _ => (), + }); + + // failure cases Domain::Nullable(NullableDomain { - has_null: false, - value: Some(Box::new(new_domain)), + has_null: true, + value: None, }) } - - // failure cases - _ => Domain::Nullable(NullableDomain { - has_null: true, - value: None, - }), } } } diff --git a/common/expression/src/expression.rs b/common/expression/src/expression.rs index 7700d04e9e1ac..d4544c3e020b4 100644 --- a/common/expression/src/expression.rs +++ b/common/expression/src/expression.rs @@ -14,6 +14,7 @@ use std::sync::Arc; +use educe::Educe; use serde::Deserialize; use serde::Serialize; @@ -54,7 +55,8 @@ pub enum RawExpr { }, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Educe)] +#[educe(PartialEq)] pub enum Expr { Constant { span: Span, @@ -77,6 +79,7 @@ pub enum Expr { FunctionCall { span: Span, id: FunctionID, + #[educe(PartialEq(ignore))] function: Arc, generics: Vec, args: Vec, diff --git a/common/expression/src/function.rs b/common/expression/src/function.rs index 48cc0ed95960a..bf479bffabeaa 100755 --- a/common/expression/src/function.rs +++ b/common/expression/src/function.rs @@ -50,7 +50,7 @@ impl Default for FunctionContext { /// `FunctionID` is a unique identifier for a function. It's used to construct /// the exactly same function from the remote execution nodes. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum FunctionID { Builtin { name: String, @@ -67,7 +67,7 @@ pub enum FunctionID { pub struct Function { pub signature: FunctionSignature, #[allow(clippy::type_complexity)] - pub calc_domain: Box Domain>, + pub calc_domain: Box Option>, #[allow(clippy::type_complexity)] pub eval: Box], &GenericMap) -> Result, String>>, } @@ -236,7 +236,7 @@ impl FunctionRegistry { self.register_1_arg_core::( name, property.clone(), - |_| None, + |_| Some(()), |_, _| Ok(Value::Scalar(())), ); @@ -328,19 +328,19 @@ impl FunctionRegistry { self.register_2_arg_core::, NullType, NullType, _, _>( name, property.clone(), - |_, _| None, + |_, _| Some(()), |_, _, _| Ok(Value::Scalar(())), ); self.register_2_arg_core::, NullType, _, _>( name, property.clone(), - |_, _| None, + |_, _| Some(()), |_, _, _| Ok(Value::Scalar(())), ); self.register_2_arg_core::( name, property.clone(), - |_, _| None, + |_, _| Some(()), |_, _, _| Ok(Value::Scalar(())), ); @@ -411,31 +411,26 @@ impl FunctionRegistry { fn erase_calc_domain_generic_0_arg( func: impl Fn() -> Option, -) -> impl Fn(&[Domain], &GenericMap) -> Domain { - move |_args, generics| { - let domain = func().unwrap_or_else(|| O::full_domain(generics)); - O::upcast_domain(domain) - } +) -> impl Fn(&[Domain], &GenericMap) -> Option { + move |_args, _generics| func().map(O::upcast_domain) } fn erase_calc_domain_generic_1_arg( func: impl Fn(&I1::Domain) -> Option, -) -> impl Fn(&[Domain], &GenericMap) -> Domain { - move |args, generics| { +) -> impl Fn(&[Domain], &GenericMap) -> Option { + move |args, _generics| { let arg1 = I1::try_downcast_domain(&args[0]).unwrap(); - let domain = func(&arg1).unwrap_or_else(|| O::full_domain(generics)); - O::upcast_domain(domain) + func(&arg1).map(O::upcast_domain) } } fn erase_calc_domain_generic_2_arg( func: impl Fn(&I1::Domain, &I2::Domain) -> Option, -) -> impl Fn(&[Domain], &GenericMap) -> Domain { - move |args, generics| { +) -> impl Fn(&[Domain], &GenericMap) -> Option { + move |args, _generics| { let arg1 = I1::try_downcast_domain(&args[0]).unwrap(); let arg2 = I2::try_downcast_domain(&args[1]).unwrap(); - let domain = func(&arg1, &arg2).unwrap_or_else(|| O::full_domain(generics)); - O::upcast_domain(domain) + func(&arg1, &arg2).map(O::upcast_domain) } } diff --git a/common/expression/src/property.rs b/common/expression/src/property.rs index b79e11c6fffa1..4bbb6413a6eb8 100644 --- a/common/expression/src/property.rs +++ b/common/expression/src/property.rs @@ -14,14 +14,12 @@ use enum_as_inner::EnumAsInner; +use crate::types::number::overflow_cast; +use crate::types::number::Number; use crate::types::AnyType; -use crate::types::ArgType; -use crate::types::BooleanType; -use crate::types::DataType; -use crate::types::GenericMap; -use crate::types::NumberType; -use crate::types::StringType; use crate::types::ValueType; +use crate::with_number_type; +use crate::Scalar; #[derive(Debug, Clone, Default)] pub struct FunctionProperty { @@ -37,9 +35,16 @@ impl FunctionProperty { #[derive(Debug, Clone, PartialEq, EnumAsInner)] pub enum Domain { - Int(IntDomain), - UInt(UIntDomain), - Float(FloatDomain), + Int8(NumberDomain), + Int16(NumberDomain), + Int32(NumberDomain), + Int64(NumberDomain), + UInt8(NumberDomain), + UInt16(NumberDomain), + UInt32(NumberDomain), + UInt64(NumberDomain), + Float32(NumberDomain), + Float64(NumberDomain), Boolean(BooleanDomain), String(StringDomain), Nullable(NullableDomain), @@ -49,21 +54,9 @@ pub enum Domain { } #[derive(Debug, Clone, PartialEq, Eq)] -pub struct IntDomain { - pub min: i64, - pub max: i64, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct UIntDomain { - pub min: u64, - pub max: u64, -} - -#[derive(Debug, Clone, PartialEq)] -pub struct FloatDomain { - pub min: f64, - pub max: f64, +pub struct NumberDomain { + pub min: T::Storage, + pub max: T::Storage, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -85,64 +78,18 @@ pub struct NullableDomain { } impl Domain { - pub fn full(ty: &DataType, generics: &GenericMap) -> Self { - match ty { - DataType::Null => Domain::Nullable(NullableDomain { - has_null: true, - value: None, - }), - DataType::EmptyArray => Domain::Array(None), - DataType::Int8 => Domain::Int(NumberType::::full_domain(generics)), - DataType::Int16 => Domain::Int(NumberType::::full_domain(generics)), - DataType::Int32 => Domain::Int(NumberType::::full_domain(generics)), - DataType::Int64 => Domain::Int(NumberType::::full_domain(generics)), - DataType::UInt8 => Domain::UInt(NumberType::::full_domain(generics)), - DataType::UInt16 => Domain::UInt(NumberType::::full_domain(generics)), - DataType::UInt32 => Domain::UInt(NumberType::::full_domain(generics)), - DataType::UInt64 => Domain::UInt(NumberType::::full_domain(generics)), - DataType::Float32 => Domain::Float(NumberType::::full_domain(generics)), - DataType::Float64 => Domain::Float(NumberType::::full_domain(generics)), - DataType::Boolean => Domain::Boolean(BooleanType::full_domain(generics)), - DataType::String => Domain::String(StringType::full_domain(generics)), - DataType::Nullable(ty) => Domain::Nullable(NullableDomain { - has_null: true, - value: Some(Box::new(Domain::full(ty, generics))), - }), - DataType::Tuple(tys) => { - Domain::Tuple(tys.iter().map(|ty| Domain::full(ty, generics)).collect()) - } - DataType::Array(ty) => Domain::Array(Some(Box::new(Domain::full(ty, generics)))), - DataType::Map(_) => Domain::Undefined, - DataType::Generic(idx) => Domain::full(&generics[*idx], generics), - } - } - pub fn merge(&self, other: &Domain) -> Domain { match (self, other) { - (Domain::Int(self_int), Domain::Int(other_int)) => Domain::Int(IntDomain { - min: self_int.min.min(other_int.min), - max: self_int.max.max(other_int.max), - }), - (Domain::UInt(self_uint), Domain::UInt(other_uint)) => Domain::UInt(UIntDomain { - min: self_uint.min.min(other_uint.min), - max: self_uint.max.max(other_uint.max), + (Domain::Boolean(this), Domain::Boolean(other)) => Domain::Boolean(BooleanDomain { + has_false: this.has_false || other.has_false, + has_true: this.has_true || other.has_true, }), - (Domain::Float(self_uint), Domain::Float(other_uint)) => Domain::Float(FloatDomain { - min: self_uint.min.min(other_uint.min), - max: self_uint.max.max(other_uint.max), - }), - (Domain::Boolean(self_bool), Domain::Boolean(other_bool)) => { - Domain::Boolean(BooleanDomain { - has_false: self_bool.has_false || other_bool.has_false, - has_true: self_bool.has_true || other_bool.has_true, - }) - } - (Domain::String(self_str), Domain::String(other_str)) => Domain::String(StringDomain { - min: self_str.min.as_slice().min(&other_str.min).to_vec(), - max: self_str + (Domain::String(this), Domain::String(other)) => Domain::String(StringDomain { + min: this.min.as_slice().min(&other.min).to_vec(), + max: this .max .as_ref() - .zip(other_str.max.as_ref()) + .zip(other.max.as_ref()) .map(|(self_max, other_max)| self_max.max(other_max).to_vec()), }), ( @@ -210,7 +157,69 @@ impl Domain { .map(|(self_tup, other_tup)| self_tup.merge(other_tup)) .collect(), ), - (a, b) => unreachable!("unable to merge {:?} with {:?}", a, b), + (this, other) => { + with_number_type!(|TYPE| match (this, other) { + (Domain::TYPE(this), Domain::TYPE(other)) => Domain::TYPE(NumberDomain { + min: this.min.min(other.min), + max: this.max.max(other.max), + }), + _ => unreachable!("unable to merge {this:?} with {other:?}"), + }) + } } } + + pub fn as_singleton(&self) -> Option { + match self { + Domain::Int8(NumberDomain { min, max }) if min == max => Some(Scalar::Int8(*min)), + Domain::Int16(NumberDomain { min, max }) if min == max => Some(Scalar::Int16(*min)), + Domain::Int32(NumberDomain { min, max }) if min == max => Some(Scalar::Int32(*min)), + Domain::Int64(NumberDomain { min, max }) if min == max => Some(Scalar::Int64(*min)), + Domain::UInt8(NumberDomain { min, max }) if min == max => Some(Scalar::UInt8(*min)), + Domain::UInt16(NumberDomain { min, max }) if min == max => Some(Scalar::UInt16(*min)), + Domain::UInt32(NumberDomain { min, max }) if min == max => Some(Scalar::UInt32(*min)), + Domain::UInt64(NumberDomain { min, max }) if min == max => Some(Scalar::UInt64(*min)), + Domain::Float32(NumberDomain { min, max }) if min == max => Some(Scalar::Float32(*min)), + Domain::Float64(NumberDomain { min, max }) if min == max => Some(Scalar::Float64(*min)), + Domain::Boolean(BooleanDomain { + has_false: true, + has_true: false, + }) => Some(Scalar::Boolean(false)), + Domain::Boolean(BooleanDomain { + has_false: false, + has_true: true, + }) => Some(Scalar::Boolean(true)), + Domain::String(StringDomain { min, max }) if Some(min) == max.as_ref() => { + Some(Scalar::String(min.clone())) + } + Domain::Nullable(NullableDomain { + has_null: true, + value: None, + }) => Some(Scalar::Null), + Domain::Nullable(NullableDomain { + has_null: false, + value: Some(value), + }) => value.as_singleton(), + Domain::Array(None) => Some(Scalar::EmptyArray), + Domain::Tuple(fields) => Some(Scalar::Tuple( + fields + .iter() + .map(|field| field.as_singleton()) + .collect::>>()?, + )), + _ => None, + } + } +} + +impl NumberDomain { + /// Returns the saturating cast domain and a flag denoting whether overflow happened. + pub fn overflow_cast(&self) -> (NumberDomain, bool) { + let (min, min_overflowing) = overflow_cast::(self.min); + let (max, max_overflowing) = overflow_cast::(self.max); + ( + NumberDomain { min, max }, + min_overflowing || max_overflowing, + ) + } } diff --git a/common/expression/src/types.rs b/common/expression/src/types.rs index 90b5e0436049a..d68aeba6e05ba 100755 --- a/common/expression/src/types.rs +++ b/common/expression/src/types.rs @@ -75,12 +75,12 @@ pub enum DataType { } pub trait ValueType: Debug + Clone + PartialEq + Sized + 'static { - type Scalar: Debug + Clone; - type ScalarRef<'a>: Debug + Clone; - type Column: Debug + Clone; + type Scalar: Debug + Clone + PartialEq; + type ScalarRef<'a>: Debug + Clone + PartialEq; + type Column: Debug + Clone + PartialEq; type Domain: Debug + Clone + PartialEq; type ColumnIterator<'a>: Iterator> + TrustedLen; - type ColumnBuilder: Clone; + type ColumnBuilder: Debug + Clone + PartialEq; fn to_owned_scalar<'a>(scalar: Self::ScalarRef<'a>) -> Self::Scalar; fn to_scalar_ref<'a>(scalar: &'a Self::Scalar) -> Self::ScalarRef<'a>; @@ -108,7 +108,6 @@ pub trait ValueType: Debug + Clone + PartialEq + Sized + 'static { pub trait ArgType: ValueType { fn data_type() -> DataType; - fn full_domain(generics: &GenericMap) -> Self::Domain; fn create_builder(capacity: usize, generics: &GenericMap) -> Self::ColumnBuilder; fn column_from_iter( iter: impl Iterator, @@ -338,7 +337,7 @@ const fn max_bit_with(lhs: u8, rhs: u8) -> u8 { #[macro_export] macro_rules! with_number_type { - ($t:tt, $($tail:tt)*) => {{ + ( | $t:tt | $($tail:tt)* ) => {{ match_template::match_template! { $t = [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64], $($tail)* diff --git a/common/expression/src/types/array.rs b/common/expression/src/types/array.rs index 07a4dbe50065a..e4e1c4a59142c 100755 --- a/common/expression/src/types/array.rs +++ b/common/expression/src/types/array.rs @@ -128,10 +128,6 @@ impl ArgType for ArrayType { DataType::Array(Box::new(T::data_type())) } - fn full_domain(generics: &GenericMap) -> Self::Domain { - T::full_domain(generics) - } - fn create_builder(capacity: usize, generics: &GenericMap) -> Self::ColumnBuilder { ArrayColumnBuilder::with_capacity(capacity, 0, generics) } @@ -211,7 +207,7 @@ impl<'a, T: ValueType> Iterator for ArrayIterator<'a, T> { unsafe impl<'a, T: ValueType> TrustedLen for ArrayIterator<'a, T> {} -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct ArrayColumnBuilder { pub builder: T::ColumnBuilder, pub offsets: Vec, diff --git a/common/expression/src/types/boolean.rs b/common/expression/src/types/boolean.rs index 27407db583f96..e773daa12ef76 100644 --- a/common/expression/src/types/boolean.rs +++ b/common/expression/src/types/boolean.rs @@ -128,13 +128,6 @@ impl ArgType for BooleanType { DataType::Boolean } - fn full_domain(_: &GenericMap) -> Self::Domain { - BooleanDomain { - has_false: true, - has_true: true, - } - } - fn create_builder(capacity: usize, _: &GenericMap) -> Self::ColumnBuilder { MutableBitmap::with_capacity(capacity) } diff --git a/common/expression/src/types/empty_array.rs b/common/expression/src/types/empty_array.rs index b38d71d5df0f3..7fe802a97d58e 100644 --- a/common/expression/src/types/empty_array.rs +++ b/common/expression/src/types/empty_array.rs @@ -126,8 +126,6 @@ impl ArgType for EmptyArrayType { DataType::EmptyArray } - fn full_domain(_: &GenericMap) -> Self::Domain {} - fn create_builder(_capacity: usize, _generics: &GenericMap) -> Self::ColumnBuilder { 0 } diff --git a/common/expression/src/types/generic.rs b/common/expression/src/types/generic.rs index f62d7af8378e5..c066cf8fbb771 100755 --- a/common/expression/src/types/generic.rs +++ b/common/expression/src/types/generic.rs @@ -118,10 +118,6 @@ impl ArgType for GenericType { DataType::Generic(INDEX) } - fn full_domain(generics: &GenericMap) -> Self::Domain { - Domain::full(&generics[INDEX], generics) - } - fn create_builder(capacity: usize, generics: &GenericMap) -> Self::ColumnBuilder { ColumnBuilder::with_capacity(&generics[INDEX], capacity) } diff --git a/common/expression/src/types/map.rs b/common/expression/src/types/map.rs index 559a788d2476d..1870dbc573753 100755 --- a/common/expression/src/types/map.rs +++ b/common/expression/src/types/map.rs @@ -140,8 +140,6 @@ impl ArgType for KvPair { DataType::Tuple(vec![K::data_type(), V::data_type()]) } - fn full_domain(_: &GenericMap) -> Self::Domain {} - fn create_builder(capacity: usize, generics: &GenericMap) -> Self::ColumnBuilder { KvColumnBuilder::with_capacity(capacity, generics) } @@ -351,10 +349,6 @@ impl ArgType for MapType { DataType::Map(Box::new(T::data_type())) } - fn full_domain(generics: &GenericMap) -> Self::Domain { - as ArgType>::full_domain(generics) - } - fn create_builder(capacity: usize, generics: &GenericMap) -> Self::ColumnBuilder { as ArgType>::create_builder(capacity, generics) } diff --git a/common/expression/src/types/null.rs b/common/expression/src/types/null.rs index 7d359236c984b..b079d1d5bbaf1 100644 --- a/common/expression/src/types/null.rs +++ b/common/expression/src/types/null.rs @@ -133,8 +133,6 @@ impl ArgType for NullType { DataType::Null } - fn full_domain(_: &GenericMap) -> Self::Domain {} - fn create_builder(_capacity: usize, _generics: &GenericMap) -> Self::ColumnBuilder { 0 } diff --git a/common/expression/src/types/nullable.rs b/common/expression/src/types/nullable.rs index 10491b6486c10..2b816b7876ba9 100755 --- a/common/expression/src/types/nullable.rs +++ b/common/expression/src/types/nullable.rs @@ -142,13 +142,6 @@ impl ArgType for NullableType { DataType::Nullable(Box::new(T::data_type())) } - fn full_domain(generics: &GenericMap) -> Self::Domain { - NullableDomain { - has_null: true, - value: Some(Box::new(T::full_domain(generics))), - } - } - fn create_builder(capacity: usize, generics: &GenericMap) -> Self::ColumnBuilder { NullableColumnBuilder::with_capacity(capacity, generics) } diff --git a/common/expression/src/types/number.rs b/common/expression/src/types/number.rs index cc75579ba16e6..f12c238517786 100644 --- a/common/expression/src/types/number.rs +++ b/common/expression/src/types/number.rs @@ -18,11 +18,10 @@ use std::ops::Range; use common_arrow::arrow::buffer::Buffer; use common_arrow::arrow::types::NativeType; +use num_traits::NumCast; +use ordered_float::OrderedFloat; use crate::property::Domain; -use crate::property::FloatDomain; -use crate::property::IntDomain; -use crate::property::UIntDomain; use crate::types::ArgType; use crate::types::DataType; use crate::types::GenericMap; @@ -30,32 +29,35 @@ use crate::types::ValueType; use crate::util::buffer_into_mut; use crate::values::Column; use crate::values::Scalar; +use crate::NumberDomain; use crate::ScalarRef; pub trait Number: Debug + Clone + PartialEq + 'static { - type Storage: NativeType; - type Domain: Debug + Clone + PartialEq; + type Storage: NumCast + Copy + From + NativeType; + type OrderedStorage: NumCast + Copy + From + Ord; + + const MIN: Self::Storage; + const MAX: Self::Storage; fn data_type() -> DataType; fn try_downcast_scalar(scalar: &ScalarRef) -> Option; fn try_downcast_column(col: &Column) -> Option>; - fn try_downcast_domain(domain: &Domain) -> Option; + fn try_downcast_domain(domain: &Domain) -> Option>; fn upcast_scalar(scalar: Self::Storage) -> Scalar; fn upcast_column(col: Buffer) -> Column; - fn upcast_domain(domain: Self::Domain) -> Domain; - fn full_domain() -> Self::Domain; + fn upcast_domain(domain: NumberDomain) -> Domain; } #[derive(Debug, Clone, PartialEq, Eq)] pub struct NumberType(PhantomData); -impl ValueType for NumberType { - type Scalar = Int::Storage; - type ScalarRef<'a> = Int::Storage; - type Column = Buffer; - type Domain = Int::Domain; - type ColumnIterator<'a> = std::iter::Cloned>; - type ColumnBuilder = Vec; +impl ValueType for NumberType { + type Scalar = Num::Storage; + type ScalarRef<'a> = Num::Storage; + type Column = Buffer; + type Domain = NumberDomain; + type ColumnIterator<'a> = std::iter::Cloned>; + type ColumnBuilder = Vec; fn to_owned_scalar<'a>(scalar: Self::ScalarRef<'a>) -> Self::Scalar { scalar @@ -66,27 +68,27 @@ impl ValueType for NumberType { } fn try_downcast_scalar<'a>(scalar: &'a ScalarRef) -> Option> { - Int::try_downcast_scalar(scalar) + Num::try_downcast_scalar(scalar) } fn try_downcast_column<'a>(col: &'a Column) -> Option { - Int::try_downcast_column(col) + Num::try_downcast_column(col) } - fn try_downcast_domain(domain: &Domain) -> Option { - Int::try_downcast_domain(domain) + fn try_downcast_domain(domain: &Domain) -> Option> { + Num::try_downcast_domain(domain) } fn upcast_scalar(scalar: Self::Scalar) -> Scalar { - Int::upcast_scalar(scalar) + Num::upcast_scalar(scalar) } fn upcast_column(col: Self::Column) -> Column { - Int::upcast_column(col) + Num::upcast_column(col) } - fn upcast_domain(domain: Self::Domain) -> Domain { - Int::upcast_domain(domain) + fn upcast_domain(domain: NumberDomain) -> Domain { + Num::upcast_domain(domain) } fn column_len<'a>(col: &'a Self::Column) -> usize { @@ -118,7 +120,7 @@ impl ValueType for NumberType { } fn push_default(builder: &mut Self::ColumnBuilder) { - builder.push(Int::Storage::default()); + builder.push(Num::Storage::default()); } fn append_builder(builder: &mut Self::ColumnBuilder, other_builder: &Self::ColumnBuilder) { @@ -135,13 +137,9 @@ impl ValueType for NumberType { } } -impl ArgType for NumberType { +impl ArgType for NumberType { fn data_type() -> DataType { - Int::data_type() - } - - fn full_domain(_: &GenericMap) -> Self::Domain { - Int::full_domain() + Num::data_type() } fn create_builder(capacity: usize, _generics: &GenericMap) -> Self::ColumnBuilder { @@ -155,7 +153,10 @@ impl ArgType for NumberType { impl Number for u8 { type Storage = u8; - type Domain = UIntDomain; + type OrderedStorage = u8; + + const MIN: Self::Storage = u8::MIN; + const MAX: Self::Storage = u8::MAX; fn data_type() -> DataType { DataType::UInt8 @@ -169,8 +170,8 @@ impl Number for u8 { col.as_u_int8().cloned() } - fn try_downcast_domain(domain: &Domain) -> Option { - domain.as_u_int().cloned() + fn try_downcast_domain(domain: &Domain) -> Option> { + domain.as_u_int8().cloned() } fn upcast_scalar(scalar: Self::Storage) -> Scalar { @@ -181,21 +182,17 @@ impl Number for u8 { Column::UInt8(col) } - fn upcast_domain(domain: Self::Domain) -> Domain { - Domain::UInt(domain) - } - - fn full_domain() -> Self::Domain { - UIntDomain { - min: 0, - max: u8::MAX as u64, - } + fn upcast_domain(domain: NumberDomain) -> Domain { + Domain::UInt8(domain) } } impl Number for u16 { type Storage = u16; - type Domain = UIntDomain; + type OrderedStorage = u16; + + const MIN: Self::Storage = u16::MIN; + const MAX: Self::Storage = u16::MAX; fn data_type() -> DataType { DataType::UInt16 @@ -209,8 +206,8 @@ impl Number for u16 { col.as_u_int16().cloned() } - fn try_downcast_domain(domain: &Domain) -> Option { - domain.as_u_int().cloned() + fn try_downcast_domain(domain: &Domain) -> Option> { + domain.as_u_int16().cloned() } fn upcast_scalar(scalar: Self::Storage) -> Scalar { @@ -221,21 +218,17 @@ impl Number for u16 { Column::UInt16(col) } - fn upcast_domain(domain: Self::Domain) -> Domain { - Domain::UInt(domain) - } - - fn full_domain() -> Self::Domain { - UIntDomain { - min: 0, - max: u16::MAX as u64, - } + fn upcast_domain(domain: NumberDomain) -> Domain { + Domain::UInt16(domain) } } impl Number for u32 { type Storage = u32; - type Domain = UIntDomain; + type OrderedStorage = u32; + + const MIN: Self::Storage = u32::MIN; + const MAX: Self::Storage = u32::MAX; fn data_type() -> DataType { DataType::UInt32 @@ -249,8 +242,8 @@ impl Number for u32 { col.as_u_int32().cloned() } - fn try_downcast_domain(domain: &Domain) -> Option { - domain.as_u_int().cloned() + fn try_downcast_domain(domain: &Domain) -> Option> { + domain.as_u_int32().cloned() } fn upcast_scalar(scalar: Self::Storage) -> Scalar { @@ -261,21 +254,17 @@ impl Number for u32 { Column::UInt32(col) } - fn upcast_domain(domain: Self::Domain) -> Domain { - Domain::UInt(domain) - } - - fn full_domain() -> Self::Domain { - UIntDomain { - min: 0, - max: u32::MAX as u64, - } + fn upcast_domain(domain: NumberDomain) -> Domain { + Domain::UInt32(domain) } } impl Number for u64 { type Storage = u64; - type Domain = UIntDomain; + type OrderedStorage = u64; + + const MIN: Self::Storage = u64::MIN; + const MAX: Self::Storage = u64::MAX; fn data_type() -> DataType { DataType::UInt64 @@ -289,9 +278,10 @@ impl Number for u64 { col.as_u_int64().cloned() } - fn try_downcast_domain(domain: &Domain) -> Option { - domain.as_u_int().cloned() + fn try_downcast_domain(domain: &Domain) -> Option> { + domain.as_u_int64().cloned() } + fn upcast_scalar(scalar: Self::Storage) -> Scalar { Scalar::UInt64(scalar) } @@ -300,20 +290,17 @@ impl Number for u64 { Column::UInt64(col) } - fn upcast_domain(domain: Self::Domain) -> Domain { - Domain::UInt(domain) - } - fn full_domain() -> Self::Domain { - UIntDomain { - min: 0, - max: u64::MAX, - } + fn upcast_domain(domain: NumberDomain) -> Domain { + Domain::UInt64(domain) } } impl Number for i8 { type Storage = i8; - type Domain = IntDomain; + type OrderedStorage = i8; + + const MIN: Self::Storage = i8::MIN; + const MAX: Self::Storage = i8::MAX; fn data_type() -> DataType { DataType::Int8 @@ -327,8 +314,8 @@ impl Number for i8 { col.as_int8().cloned() } - fn try_downcast_domain(domain: &Domain) -> Option { - domain.as_int().cloned() + fn try_downcast_domain(domain: &Domain) -> Option> { + domain.as_int8().cloned() } fn upcast_scalar(scalar: Self::Storage) -> Scalar { @@ -339,21 +326,17 @@ impl Number for i8 { Column::Int8(col) } - fn upcast_domain(domain: Self::Domain) -> Domain { - Domain::Int(domain) - } - - fn full_domain() -> Self::Domain { - IntDomain { - min: i8::MIN as i64, - max: i8::MAX as i64, - } + fn upcast_domain(domain: NumberDomain) -> Domain { + Domain::Int8(domain) } } impl Number for i16 { type Storage = i16; - type Domain = IntDomain; + type OrderedStorage = i16; + + const MIN: Self::Storage = i16::MIN; + const MAX: Self::Storage = i16::MAX; fn data_type() -> DataType { DataType::Int16 @@ -367,8 +350,8 @@ impl Number for i16 { col.as_int16().cloned() } - fn try_downcast_domain(domain: &Domain) -> Option { - domain.as_int().cloned() + fn try_downcast_domain(domain: &Domain) -> Option> { + domain.as_int16().cloned() } fn upcast_scalar(scalar: Self::Storage) -> Scalar { @@ -379,21 +362,17 @@ impl Number for i16 { Column::Int16(col) } - fn upcast_domain(domain: Self::Domain) -> Domain { - Domain::Int(domain) - } - - fn full_domain() -> Self::Domain { - IntDomain { - min: i16::MIN as i64, - max: i16::MAX as i64, - } + fn upcast_domain(domain: NumberDomain) -> Domain { + Domain::Int16(domain) } } impl Number for i32 { type Storage = i32; - type Domain = IntDomain; + type OrderedStorage = i32; + + const MIN: Self::Storage = i32::MIN; + const MAX: Self::Storage = i32::MAX; fn data_type() -> DataType { DataType::Int32 @@ -407,8 +386,8 @@ impl Number for i32 { col.as_int32().cloned() } - fn try_downcast_domain(domain: &Domain) -> Option { - domain.as_int().cloned() + fn try_downcast_domain(domain: &Domain) -> Option> { + domain.as_int32().cloned() } fn upcast_scalar(scalar: Self::Storage) -> Scalar { @@ -419,20 +398,17 @@ impl Number for i32 { Column::Int32(col) } - fn upcast_domain(domain: Self::Domain) -> Domain { - Domain::Int(domain) - } - - fn full_domain() -> Self::Domain { - IntDomain { - min: i32::MIN as i64, - max: i32::MAX as i64, - } + fn upcast_domain(domain: NumberDomain) -> Domain { + Domain::Int32(domain) } } + impl Number for i64 { type Storage = i64; - type Domain = IntDomain; + type OrderedStorage = i64; + + const MIN: Self::Storage = i64::MIN; + const MAX: Self::Storage = i64::MAX; fn data_type() -> DataType { DataType::Int64 @@ -446,8 +422,8 @@ impl Number for i64 { col.as_int64().cloned() } - fn try_downcast_domain(domain: &Domain) -> Option { - domain.as_int().cloned() + fn try_downcast_domain(domain: &Domain) -> Option> { + domain.as_int64().cloned() } fn upcast_scalar(scalar: Self::Storage) -> Scalar { @@ -458,21 +434,17 @@ impl Number for i64 { Column::Int64(col) } - fn upcast_domain(domain: Self::Domain) -> Domain { - Domain::Int(domain) - } - - fn full_domain() -> Self::Domain { - IntDomain { - min: i64::MIN, - max: i64::MAX, - } + fn upcast_domain(domain: NumberDomain) -> Domain { + Domain::Int64(domain) } } impl Number for f32 { type Storage = f32; - type Domain = FloatDomain; + type OrderedStorage = OrderedFloat; + + const MIN: Self::Storage = f32::NEG_INFINITY; + const MAX: Self::Storage = f32::INFINITY; fn data_type() -> DataType { DataType::Float32 @@ -486,8 +458,8 @@ impl Number for f32 { col.as_float32().cloned() } - fn try_downcast_domain(domain: &Domain) -> Option { - domain.as_float().cloned() + fn try_downcast_domain(domain: &Domain) -> Option> { + domain.as_float32().cloned() } fn upcast_scalar(scalar: Self::Storage) -> Scalar { @@ -498,21 +470,17 @@ impl Number for f32 { Column::Float32(col) } - fn upcast_domain(domain: Self::Domain) -> Domain { - Domain::Float(domain) - } - - fn full_domain() -> Self::Domain { - FloatDomain { - min: f32::NEG_INFINITY as f64, - max: f32::INFINITY as f64, - } + fn upcast_domain(domain: NumberDomain) -> Domain { + Domain::Float32(domain) } } impl Number for f64 { type Storage = f64; - type Domain = FloatDomain; + type OrderedStorage = OrderedFloat; + + const MIN: Self::Storage = f64::NEG_INFINITY; + const MAX: Self::Storage = f64::INFINITY; fn data_type() -> DataType { DataType::Float64 @@ -526,9 +494,10 @@ impl Number for f64 { col.as_float64().cloned() } - fn try_downcast_domain(domain: &Domain) -> Option { - domain.as_float().cloned() + fn try_downcast_domain(domain: &Domain) -> Option> { + domain.as_float64().cloned() } + fn upcast_scalar(scalar: Self::Storage) -> Scalar { Scalar::Float64(scalar) } @@ -537,13 +506,20 @@ impl Number for f64 { Column::Float64(col) } - fn upcast_domain(domain: Self::Domain) -> Domain { - Domain::Float(domain) - } - fn full_domain() -> Self::Domain { - FloatDomain { - min: f64::NEG_INFINITY, - max: f64::INFINITY, - } + fn upcast_domain(domain: NumberDomain) -> Domain { + Domain::Float64(domain) } } + +pub fn overflow_cast(src: T::Storage) -> (U::Storage, bool) { + let src: T::OrderedStorage = src.into(); + let dest_min: T::OrderedStorage = num_traits::cast(U::MIN).unwrap_or(T::MIN).into(); + let dest_max: T::OrderedStorage = num_traits::cast(U::MAX).unwrap_or(T::MAX).into(); + let src_clamp: T::OrderedStorage = src.clamp(dest_min, dest_max); + let overflowing = src != src_clamp; + // The number must be within the range that `U` can represent after clamping, therefore + // it's safe to unwrap. + let dest: U::OrderedStorage = num_traits::cast(src_clamp).unwrap(); + + (dest.into(), overflowing) +} diff --git a/common/expression/src/types/string.rs b/common/expression/src/types/string.rs index 9d26e59dda034..074b0e3afa101 100644 --- a/common/expression/src/types/string.rs +++ b/common/expression/src/types/string.rs @@ -123,13 +123,6 @@ impl ArgType for StringType { DataType::String } - fn full_domain(_: &GenericMap) -> Self::Domain { - StringDomain { - min: vec![], - max: None, - } - } - fn create_builder(capacity: usize, _: &GenericMap) -> Self::ColumnBuilder { StringColumnBuilder::with_capacity(capacity, 0) } @@ -194,7 +187,7 @@ impl<'a> Iterator for StringIterator<'a> { unsafe impl<'a> TrustedLen for StringIterator<'a> {} -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct StringColumnBuilder { pub data: Vec, pub offsets: Vec, diff --git a/common/expression/src/values.rs b/common/expression/src/values.rs index 5735566ee0c09..30fc48c879a04 100755 --- a/common/expression/src/values.rs +++ b/common/expression/src/values.rs @@ -20,17 +20,14 @@ use common_arrow::arrow::buffer::Buffer; use common_arrow::arrow::trusted_len::TrustedLen; use enum_as_inner::EnumAsInner; use itertools::Itertools; -use ordered_float::NotNan; +use ordered_float::OrderedFloat; use serde::Deserialize; use serde::Serialize; use crate::property::BooleanDomain; use crate::property::Domain; -use crate::property::FloatDomain; -use crate::property::IntDomain; use crate::property::NullableDomain; use crate::property::StringDomain; -use crate::property::UIntDomain; use crate::types::array::ArrayColumn; use crate::types::array::ArrayColumnBuilder; use crate::types::nullable::NullableColumn; @@ -42,20 +39,21 @@ use crate::util::append_bitmap; use crate::util::bitmap_into_mut; use crate::util::buffer_into_mut; use crate::util::constant_bitmap; +use crate::NumberDomain; -#[derive(EnumAsInner, Clone)] +#[derive(Debug, Clone, EnumAsInner)] pub enum Value { Scalar(T::Scalar), Column(T::Column), } -#[derive(EnumAsInner)] +#[derive(Debug, Clone, EnumAsInner)] pub enum ValueRef<'a, T: ValueType> { Scalar(T::ScalarRef<'a>), Column(T::Column), } -#[derive(Debug, Clone, Default, EnumAsInner, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Default, EnumAsInner, Serialize, Deserialize)] pub enum Scalar { #[default] Null, @@ -77,7 +75,7 @@ pub enum Scalar { Tuple(Vec), } -#[derive(Debug, Clone, Default, EnumAsInner)] +#[derive(Clone, PartialEq, Default, EnumAsInner)] pub enum ScalarRef<'a> { #[default] Null, @@ -119,7 +117,7 @@ pub enum Column { Tuple { fields: Vec, len: usize }, } -#[derive(Debug, Clone, EnumAsInner)] +#[derive(Debug, Clone, PartialEq, EnumAsInner)] pub enum ColumnBuilder { Null { len: usize, @@ -154,6 +152,23 @@ impl<'a, T: ValueType> ValueRef<'a, T> { ValueRef::Column(col) => Value::Column(col), } } + + pub fn sematically_eq(&'a self, other: &'a Self) -> bool { + match (self, other) { + (ValueRef::Scalar(s1), ValueRef::Scalar(s2)) => s1 == s2, + (ValueRef::Column(c1), ValueRef::Column(c2)) => c1 == c2, + (ValueRef::Scalar(s), ValueRef::Column(c)) + | (ValueRef::Column(c), ValueRef::Scalar(s)) => { + for scalar in T::iter_column(c) { + if scalar != *s { + return false; + } + } + true + // T::iter_column(c).all(|scalar| &scalar == s) + } + } + } } impl<'a, T: ValueType> Value { @@ -188,13 +203,11 @@ impl<'a> ValueRef<'a, AnyType> { ValueRef::Column(col) => col.index(index), } } -} -impl<'a, T: ValueType> Clone for ValueRef<'a, T> { - fn clone(&self) -> Self { + pub fn domain(&self) -> Domain { match self { - ValueRef::Scalar(scalar) => ValueRef::Scalar(scalar.clone()), - ValueRef::Column(col) => ValueRef::Column(col.clone()), + ValueRef::Scalar(scalar) => scalar.domain(), + ValueRef::Column(col) => col.domain(), } } } @@ -271,6 +284,42 @@ impl<'a> ScalarRef<'a> { }, } } + + pub fn domain(&self) -> Domain { + match self { + ScalarRef::Null => Domain::Nullable(NullableDomain { + has_null: true, + value: None, + }), + ScalarRef::EmptyArray => Domain::Array(None), + ScalarRef::Int8(i) => Domain::Int8(NumberDomain { min: *i, max: *i }), + ScalarRef::Int16(i) => Domain::Int16(NumberDomain { min: *i, max: *i }), + ScalarRef::Int32(i) => Domain::Int32(NumberDomain { min: *i, max: *i }), + ScalarRef::Int64(i) => Domain::Int64(NumberDomain { min: *i, max: *i }), + ScalarRef::UInt8(i) => Domain::UInt8(NumberDomain { min: *i, max: *i }), + ScalarRef::UInt16(i) => Domain::UInt16(NumberDomain { min: *i, max: *i }), + ScalarRef::UInt32(i) => Domain::UInt32(NumberDomain { min: *i, max: *i }), + ScalarRef::UInt64(i) => Domain::UInt64(NumberDomain { min: *i, max: *i }), + ScalarRef::Float32(i) => Domain::Float32(NumberDomain { min: *i, max: *i }), + ScalarRef::Float64(i) => Domain::Float64(NumberDomain { min: *i, max: *i }), + ScalarRef::Boolean(true) => Domain::Boolean(BooleanDomain { + has_false: false, + has_true: true, + }), + ScalarRef::Boolean(false) => Domain::Boolean(BooleanDomain { + has_false: true, + has_true: false, + }), + ScalarRef::String(s) => Domain::String(StringDomain { + min: s.to_vec(), + max: Some(s.to_vec()), + }), + ScalarRef::Array(array) => Domain::Array(Some(Box::new(array.domain()))), + ScalarRef::Tuple(fields) => { + Domain::Tuple(fields.iter().map(|field| field.domain()).collect()) + } + } + } } impl Column { @@ -401,87 +450,82 @@ impl Column { Column::EmptyArray { .. } => Domain::Array(None), Column::Int8(col) => { let (min, max) = col.iter().minmax().into_option().unwrap(); - Domain::Int(IntDomain { - min: *min as i64, - max: *max as i64, + Domain::Int8(NumberDomain { + min: *min, + max: *max, }) } Column::Int16(col) => { let (min, max) = col.iter().minmax().into_option().unwrap(); - Domain::Int(IntDomain { - min: *min as i64, - max: *max as i64, + Domain::Int16(NumberDomain { + min: *min, + max: *max, }) } Column::Int32(col) => { let (min, max) = col.iter().minmax().into_option().unwrap(); - Domain::Int(IntDomain { - min: *min as i64, - max: *max as i64, + Domain::Int32(NumberDomain { + min: *min, + max: *max, }) } Column::Int64(col) => { let (min, max) = col.iter().minmax().into_option().unwrap(); - Domain::Int(IntDomain { - min: *min as i64, - max: *max as i64, + Domain::Int64(NumberDomain { + min: *min, + max: *max, }) } Column::UInt8(col) => { let (min, max) = col.iter().minmax().into_option().unwrap(); - Domain::UInt(UIntDomain { - min: *min as u64, - max: *max as u64, + Domain::UInt8(NumberDomain { + min: *min, + max: *max, }) } Column::UInt16(col) => { let (min, max) = col.iter().minmax().into_option().unwrap(); - Domain::UInt(UIntDomain { - min: *min as u64, - max: *max as u64, + Domain::UInt16(NumberDomain { + min: *min, + max: *max, }) } Column::UInt32(col) => { let (min, max) = col.iter().minmax().into_option().unwrap(); - Domain::UInt(UIntDomain { - min: *min as u64, - max: *max as u64, + Domain::UInt32(NumberDomain { + min: *min, + max: *max, }) } Column::UInt64(col) => { let (min, max) = col.iter().minmax().into_option().unwrap(); - Domain::UInt(UIntDomain { - min: *min as u64, - max: *max as u64, + Domain::UInt64(NumberDomain { + min: *min, + max: *max, }) } Column::Float32(col) => { - // TODO: may panic if all values are NaN let (min, max) = col .iter() .cloned() - .map(NotNan::new) - .filter_map(Result::ok) + .map(OrderedFloat::from) .minmax() .into_option() .unwrap(); - Domain::Float(FloatDomain { - // Cast to f32 and then to f64 to round to the nearest f32 value. - min: *min as f32 as f64, - max: *max as f32 as f64, + Domain::Float32(NumberDomain { + min: *min, + max: *max, }) } Column::Float64(col) => { - // TODO: may panic if all values are NaN let (min, max) = col .iter() .cloned() - .map(NotNan::new) - .filter_map(Result::ok) + .map(OrderedFloat::from) .minmax() .into_option() .unwrap(); - Domain::Float(FloatDomain { + Domain::Float64(NumberDomain { min: *min, max: *max, }) diff --git a/common/expression/tests/it/main.rs b/common/expression/tests/it/main.rs index 74a3b37c90753..b5bb3e9ef2a57 100644 --- a/common/expression/tests/it/main.rs +++ b/common/expression/tests/it/main.rs @@ -36,17 +36,16 @@ use common_expression::BooleanDomain; use common_expression::Chunk; use common_expression::Column; use common_expression::ColumnBuilder; +use common_expression::ConstantFolder; use common_expression::Domain; -use common_expression::DomainCalculator; use common_expression::Evaluator; -use common_expression::FloatDomain; use common_expression::Function; use common_expression::FunctionContext; use common_expression::FunctionProperty; use common_expression::FunctionRegistry; use common_expression::FunctionSignature; -use common_expression::IntDomain; use common_expression::NullableDomain; +use common_expression::NumberDomain; use common_expression::RemoteExpr; use common_expression::Scalar; use common_expression::ScalarRef; @@ -55,12 +54,14 @@ use common_expression::ValueRef; use goldenfile::Mint; use parser::parse_raw_expr; +// Deprecate: move tests to `common_function_v2` #[test] pub fn test_pass() { let mut mint = Mint::new("tests/it/testdata"); let mut file = mint.new_goldenfile("run-pass.txt").unwrap(); run_ast(&mut file, "true AND false", &[]); + run_ast(&mut file, "CAST(false AS BOOLEAN NULL)", &[]); run_ast(&mut file, "null AND false", &[]); run_ast(&mut file, "plus(a, 10)", &[( "a", @@ -626,9 +627,9 @@ fn builtin_functions() -> FunctionRegistry { "plus", FunctionProperty::default(), |lhs, rhs| { - Some(IntDomain { - min: lhs.min.checked_add(rhs.min).unwrap_or(i16::MAX as i64), - max: lhs.max.checked_add(rhs.max).unwrap_or(i16::MAX as i64), + Some(NumberDomain { + min: lhs.min.checked_add(rhs.min).unwrap_or(i16::MAX), + max: lhs.max.checked_add(rhs.max).unwrap_or(i16::MAX), }) }, |lhs, rhs| lhs + rhs, @@ -638,9 +639,9 @@ fn builtin_functions() -> FunctionRegistry { "minus", FunctionProperty::default(), |lhs, rhs| { - Some(IntDomain { - min: lhs.min.checked_sub(rhs.max).unwrap_or(i32::MAX as i64), - max: lhs.max.checked_sub(rhs.min).unwrap_or(i32::MAX as i64), + Some(NumberDomain { + min: lhs.min.checked_sub(rhs.max).unwrap_or(i32::MAX), + max: lhs.max.checked_sub(rhs.min).unwrap_or(i32::MAX), }) }, |lhs, rhs| lhs - rhs, @@ -664,7 +665,7 @@ fn builtin_functions() -> FunctionRegistry { "avg", FunctionProperty::default(), |lhs, rhs| { - Some(FloatDomain { + Some(NumberDomain { min: (lhs.min + rhs.min) / 2.0, max: (lhs.max + rhs.max) / 2.0, }) @@ -695,15 +696,15 @@ fn builtin_functions() -> FunctionRegistry { calc_domain: Box::new(|args_domain, _| { let min = args_domain .iter() - .map(|domain| domain.as_int().unwrap().min) + .map(|domain| domain.as_int16().unwrap().min) .min() .unwrap_or(0); let max = args_domain .iter() - .map(|domain| domain.as_int().unwrap().max) + .map(|domain| domain.as_int16().unwrap().max) .min() .unwrap_or(0); - Domain::Int(IntDomain { min, max }) + Some(Domain::Int16(NumberDomain { min, max })) }), eval: Box::new(|args, generics| { if args.is_empty() { @@ -735,7 +736,7 @@ fn builtin_functions() -> FunctionRegistry { registry.register_0_arg_core::( "create_array", FunctionProperty::default(), - || None, + || Some(()), |_| Ok(Value::Scalar(())), ); @@ -748,9 +749,9 @@ fn builtin_functions() -> FunctionRegistry { property: FunctionProperty::default(), }, calc_domain: Box::new(|args_domain, _| { - args_domain.iter().fold(Domain::Array(None), |acc, x| { + Some(args_domain.iter().fold(Domain::Array(None), |acc, x| { acc.merge(&Domain::Array(Some(Box::new(x.clone())))) - }) + })) }), eval: Box::new(|args, generics| { let len = args.iter().find_map(|arg| match arg { @@ -798,7 +799,7 @@ fn builtin_functions() -> FunctionRegistry { registry.register_passthrough_nullable_2_arg::>, NumberType, GenericType<0>,_, _>( "get", FunctionProperty::default(), - |item_domain, _| Some(item_domain.clone()), + |_, _| None, vectorize_with_writer_2_arg::>, NumberType, GenericType<0>>( |array, idx, output| { let item = array @@ -817,7 +818,7 @@ fn builtin_functions() -> FunctionRegistry { return_type: DataType::Tuple(args_type.to_vec()), property: FunctionProperty::default(), }, - calc_domain: Box::new(|args_domain, _| Domain::Tuple(args_domain.to_vec())), + calc_domain: Box::new(|args_domain, _| Some(Domain::Tuple(args_domain.to_vec()))), eval: Box::new(move |args, _generics| { let len = args.iter().find_map(|arg| match arg { ValueRef::Column(col) => Some(col.len()), @@ -865,7 +866,7 @@ fn builtin_functions() -> FunctionRegistry { property: FunctionProperty::default(), }, calc_domain: Box::new(move |args_domain, _| { - args_domain[0].as_tuple().unwrap()[idx].clone() + Some(args_domain[0].as_tuple().unwrap()[idx].clone()) }), eval: Box::new(move |args, _| match &args[0] { ValueRef::Scalar(ScalarRef::Tuple(fields)) => { @@ -904,10 +905,10 @@ fn builtin_functions() -> FunctionRegistry { let fields = value.as_tuple().unwrap(); Box::new(fields[idx].clone()) }); - Domain::Nullable(NullableDomain { + Some(Domain::Nullable(NullableDomain { has_null: *has_null, value, - }) + })) }), eval: Box::new(move |args, _| match &args[0] { ValueRef::Scalar(ScalarRef::Null) => Ok(Value::Scalar(Scalar::Null)), @@ -950,8 +951,8 @@ fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Column .map(|(_, _, col)| col.domain()) .collect::>(); - let domain_calculator = DomainCalculator::new(input_domains.clone()); - let output_domain = domain_calculator.calculate(&expr)?; + let constant_folder = ConstantFolder::new(&input_domains); + let (optimized_expr, output_domain) = constant_folder.fold(&expr); let num_rows = columns.iter().map(|col| col.2.len()).max().unwrap_or(0); let chunk = Chunk::new( @@ -967,26 +968,42 @@ fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Column }); let evaluator = Evaluator { - input_columns: chunk, + input_columns: &chunk, context: FunctionContext::default(), }; - let result = evaluator.run(&expr)?; + let result = evaluator.run(&expr); + let optimized_result = evaluator.run(&optimized_expr); + match &result { + Ok(result) => assert!( + result + .as_ref() + .sematically_eq(&optimized_result.unwrap().as_ref()) + ), + Err(e) => assert_eq!(e, &optimized_result.unwrap_err()), + } ( raw_expr, expr, input_domains, output_ty, - output_domain, - result, + optimized_expr, + output_domain + .as_ref() + .map(ToString::to_string) + .unwrap_or("Unknown".to_string()), + result?, ) }; match result { - Ok((raw_expr, expr, input_domains, output_ty, output_domain, result)) => { + Ok((raw_expr, expr, input_domains, output_ty, optimized_expr, output_domain, result)) => { writeln!(file, "ast : {text}").unwrap(); writeln!(file, "raw expr : {raw_expr}").unwrap(); writeln!(file, "checked expr : {expr}").unwrap(); + if optimized_expr != expr { + writeln!(file, "optimized expr : {optimized_expr}").unwrap(); + } match result { Value::Scalar(output_scalar) => { diff --git a/common/expression/tests/it/testdata/run-pass.txt b/common/expression/tests/it/testdata/run-pass.txt index 3b9095e57e9a9..23e1ebce265e1 100644 --- a/common/expression/tests/it/testdata/run-pass.txt +++ b/common/expression/tests/it/testdata/run-pass.txt @@ -1,14 +1,25 @@ ast : true AND false raw expr : and(true, false) checked expr : and(true, false) +optimized expr : false output type : Boolean output domain : {FALSE} output : false +ast : CAST(false AS BOOLEAN NULL) +raw expr : CAST(false AS Boolean NULL) +checked expr : CAST(false AS Boolean NULL) +optimized expr : false +output type : Boolean NULL +output domain : {FALSE} +output : false + + ast : null AND false raw expr : and(NULL, false) checked expr : and(NULL, CAST(false AS Boolean NULL)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -16,7 +27,8 @@ output : NULL ast : plus(a, 10) raw expr : plus(ColumnRef(0)::UInt8 NULL, 10_u8) -checked expr : plus(CAST(ColumnRef(0) AS Int16 NULL), CAST(10 AS Int16 NULL)) +checked expr : plus(CAST(ColumnRef(0) AS Int16 NULL), CAST(10_u8 AS Int16 NULL)) +optimized expr : plus(CAST(ColumnRef(0) AS Int16 NULL), 10_i16) evaluation: +--------+--------------------+--------------------+ | | a | Output | @@ -62,6 +74,7 @@ evaluation (internal): ast : plus(a, b) raw expr : plus(ColumnRef(0)::UInt8 NULL, ColumnRef(1)::NULL) checked expr : plus(CAST(ColumnRef(0) AS Int16 NULL), ColumnRef(1)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -69,7 +82,8 @@ output : NULL ast : minus(a, 10) raw expr : minus(ColumnRef(0)::UInt8 NULL, 10_u8) -checked expr : minus(CAST(ColumnRef(0) AS Int32 NULL), CAST(10 AS Int32 NULL)) +checked expr : minus(CAST(ColumnRef(0) AS Int32 NULL), CAST(10_u8 AS Int32 NULL)) +optimized expr : minus(CAST(ColumnRef(0) AS Int32 NULL), 10_i32) evaluation: +--------+--------------------+------------------+ | | a | Output | @@ -115,6 +129,7 @@ evaluation (internal): ast : minus(a, b) raw expr : minus(ColumnRef(0)::UInt8 NULL, ColumnRef(1)::NULL) checked expr : minus(CAST(ColumnRef(0) AS Int32 NULL), ColumnRef(1)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -122,17 +137,18 @@ output : NULL ast : multiply(a, 10) raw expr : multiply(ColumnRef(0)::UInt8 NULL, 10_u8) -checked expr : multiply(CAST(ColumnRef(0) AS Int64 NULL), CAST(10 AS Int64 NULL)) +checked expr : multiply(CAST(ColumnRef(0) AS Int64 NULL), CAST(10_u8 AS Int64 NULL)) +optimized expr : multiply(CAST(ColumnRef(0) AS Int64 NULL), 10_i64) evaluation: -+--------+--------------------+-------------------------------------------------------+ -| | a | Output | -+--------+--------------------+-------------------------------------------------------+ -| Type | UInt8 NULL | Int64 NULL | -| Domain | {10..=12} ∪ {NULL} | {-9223372036854775808..=9223372036854775807} ∪ {NULL} | -| Row 0 | NULL | NULL | -| Row 1 | 11 | 110 | -| Row 2 | NULL | NULL | -+--------+--------------------+-------------------------------------------------------+ ++--------+--------------------+------------+ +| | a | Output | ++--------+--------------------+------------+ +| Type | UInt8 NULL | Int64 NULL | +| Domain | {10..=12} ∪ {NULL} | Unknown | +| Row 0 | NULL | NULL | +| Row 1 | 11 | 110 | +| Row 2 | NULL | NULL | ++--------+--------------------+------------+ evaluation (internal): +--------+-------------------------------------------------------------------------------------+ | Column | Data | @@ -146,15 +162,15 @@ ast : multiply(a, b) raw expr : multiply(ColumnRef(0)::UInt16 NULL, ColumnRef(1)::Int16 NULL) checked expr : multiply(CAST(ColumnRef(0) AS Int64 NULL), CAST(ColumnRef(1) AS Int64 NULL)) evaluation: -+--------+--------------------+------------------+-------------------------------------------------------+ -| | a | b | Output | -+--------+--------------------+------------------+-------------------------------------------------------+ -| Type | UInt16 NULL | Int16 NULL | Int64 NULL | -| Domain | {10..=12} ∪ {NULL} | {1..=3} ∪ {NULL} | {-9223372036854775808..=9223372036854775807} ∪ {NULL} | -| Row 0 | NULL | NULL | NULL | -| Row 1 | 11 | 2 | 22 | -| Row 2 | NULL | 3 | NULL | -+--------+--------------------+------------------+-------------------------------------------------------+ ++--------+--------------------+------------------+------------+ +| | a | b | Output | ++--------+--------------------+------------------+------------+ +| Type | UInt16 NULL | Int16 NULL | Int64 NULL | +| Domain | {10..=12} ∪ {NULL} | {1..=3} ∪ {NULL} | Unknown | +| Row 0 | NULL | NULL | NULL | +| Row 1 | 11 | 2 | 22 | +| Row 2 | NULL | 3 | NULL | ++--------+--------------------+------------------+------------+ evaluation (internal): +--------+-----------------------------------------------------------------------------------+ | Column | Data | @@ -169,15 +185,15 @@ ast : multiply(a, b) raw expr : multiply(ColumnRef(0)::UInt32 NULL, ColumnRef(1)::Int32 NULL) checked expr : multiply(CAST(ColumnRef(0) AS Int64 NULL), CAST(ColumnRef(1) AS Int64 NULL)) evaluation: -+--------+--------------------+------------------+-------------------------------------------------------+ -| | a | b | Output | -+--------+--------------------+------------------+-------------------------------------------------------+ -| Type | UInt32 NULL | Int32 NULL | Int64 NULL | -| Domain | {10..=12} ∪ {NULL} | {1..=3} ∪ {NULL} | {-9223372036854775808..=9223372036854775807} ∪ {NULL} | -| Row 0 | NULL | NULL | NULL | -| Row 1 | 11 | 2 | 22 | -| Row 2 | NULL | 3 | NULL | -+--------+--------------------+------------------+-------------------------------------------------------+ ++--------+--------------------+------------------+------------+ +| | a | b | Output | ++--------+--------------------+------------------+------------+ +| Type | UInt32 NULL | Int32 NULL | Int64 NULL | +| Domain | {10..=12} ∪ {NULL} | {1..=3} ∪ {NULL} | Unknown | +| Row 0 | NULL | NULL | NULL | +| Row 1 | 11 | 2 | 22 | +| Row 2 | NULL | 3 | NULL | ++--------+--------------------+------------------+------------+ evaluation (internal): +--------+-----------------------------------------------------------------------------------+ | Column | Data | @@ -191,6 +207,7 @@ evaluation (internal): ast : multiply(a, b) raw expr : multiply(ColumnRef(0)::UInt8 NULL, ColumnRef(1)::NULL) checked expr : multiply(CAST(ColumnRef(0) AS Int64 NULL), ColumnRef(1)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -198,17 +215,18 @@ output : NULL ast : divide(a, 10) raw expr : divide(ColumnRef(0)::UInt8 NULL, 10_u8) -checked expr : divide(CAST(ColumnRef(0) AS Float32 NULL), CAST(10 AS Float32 NULL)) +checked expr : divide(CAST(ColumnRef(0) AS Float32 NULL), CAST(10_u8 AS Float32 NULL)) +optimized expr : divide(CAST(ColumnRef(0) AS Float32 NULL), 10.0_f32) evaluation: -+--------+--------------------+-----------------------+ -| | a | Output | -+--------+--------------------+-----------------------+ -| Type | UInt8 NULL | Float32 NULL | -| Domain | {10..=12} ∪ {NULL} | {-inf..=inf} ∪ {NULL} | -| Row 0 | NULL | NULL | -| Row 1 | 11 | 1.1 | -| Row 2 | NULL | NULL | -+--------+--------------------+-----------------------+ ++--------+--------------------+--------------+ +| | a | Output | ++--------+--------------------+--------------+ +| Type | UInt8 NULL | Float32 NULL | +| Domain | {10..=12} ∪ {NULL} | Unknown | +| Row 0 | NULL | NULL | +| Row 1 | 11 | 1.1 | +| Row 2 | NULL | NULL | ++--------+--------------------+--------------+ evaluation (internal): +--------+---------------------------------------------------------------------------------------+ | Column | Data | @@ -222,15 +240,15 @@ ast : divide(a, b) raw expr : divide(ColumnRef(0)::UInt16 NULL, ColumnRef(1)::Int16 NULL) checked expr : divide(CAST(ColumnRef(0) AS Float32 NULL), CAST(ColumnRef(1) AS Float32 NULL)) evaluation: -+--------+--------------------+------------------+-----------------------+ -| | a | b | Output | -+--------+--------------------+------------------+-----------------------+ -| Type | UInt16 NULL | Int16 NULL | Float32 NULL | -| Domain | {10..=12} ∪ {NULL} | {1..=3} ∪ {NULL} | {-inf..=inf} ∪ {NULL} | -| Row 0 | NULL | NULL | NULL | -| Row 1 | 11 | 2 | 5.5 | -| Row 2 | NULL | 3 | NULL | -+--------+--------------------+------------------+-----------------------+ ++--------+--------------------+------------------+--------------+ +| | a | b | Output | ++--------+--------------------+------------------+--------------+ +| Type | UInt16 NULL | Int16 NULL | Float32 NULL | +| Domain | {10..=12} ∪ {NULL} | {1..=3} ∪ {NULL} | Unknown | +| Row 0 | NULL | NULL | NULL | +| Row 1 | 11 | 2 | 5.5 | +| Row 2 | NULL | 3 | NULL | ++--------+--------------------+------------------+--------------+ evaluation (internal): +--------+----------------------------------------------------------------------------------------+ | Column | Data | @@ -244,6 +262,7 @@ evaluation (internal): ast : divide(a, b) raw expr : divide(ColumnRef(0)::UInt8 NULL, ColumnRef(1)::NULL) checked expr : divide(CAST(ColumnRef(0) AS Float32 NULL), ColumnRef(1)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -251,7 +270,8 @@ output : NULL ast : avg(a, 10) raw expr : avg(ColumnRef(0)::UInt8 NULL, 10_u8) -checked expr : avg(CAST(ColumnRef(0) AS Float64 NULL), CAST(10 AS Float64 NULL)) +checked expr : avg(CAST(ColumnRef(0) AS Float64 NULL), CAST(10_u8 AS Float64 NULL)) +optimized expr : avg(CAST(ColumnRef(0) AS Float64 NULL), 10.0_f64) evaluation: +--------+--------------------+------------------------+ | | a | Output | @@ -366,6 +386,7 @@ evaluation (internal): ast : multiply(a, b) raw expr : multiply(ColumnRef(0)::Int8 NULL, ColumnRef(1)::NULL) checked expr : multiply(CAST(ColumnRef(0) AS Int64 NULL), ColumnRef(1)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -396,6 +417,7 @@ evaluation (internal): ast : NOT a raw expr : not(ColumnRef(0)::NULL) checked expr : not(ColumnRef(0)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -403,7 +425,8 @@ output : NULL ast : least(10, CAST(20 as Int8), 30, 40) raw expr : least(10_u8, CAST(20_u8 AS Int8), 30_u8, 40_u8) -checked expr : least(CAST(10 AS Int16), CAST(CAST(20 AS Int8) AS Int16), CAST(30 AS Int16), CAST(40 AS Int16)) +checked expr : least(CAST(10_u8 AS Int16), CAST(CAST(20_u8 AS Int8) AS Int16), CAST(30_u8 AS Int16), CAST(40_u8 AS Int16)) +optimized expr : 10_i16 output type : Int16 output domain : {10..=10} output : 10 @@ -412,6 +435,7 @@ output : 10 ast : create_tuple(null, true) raw expr : create_tuple(NULL, true) checked expr : create_tuple(NULL, true) +optimized expr : (NULL, true) output type : (NULL, Boolean) output domain : ({NULL}, {TRUE}) output : (NULL, true) @@ -470,6 +494,7 @@ evaluation (internal): ast : create_array() raw expr : create_array() checked expr : create_array<>() +optimized expr : [] output type : Array(Nothing) output domain : [] output : [] @@ -478,6 +503,7 @@ output : [] ast : create_array(null, true) raw expr : create_array(NULL, true) checked expr : create_array(CAST(NULL AS Boolean NULL), CAST(true AS Boolean NULL)) +optimized expr : create_array(NULL, true) output type : Array(Boolean NULL) output domain : [{TRUE} ∪ {NULL}] output : [NULL, true] @@ -511,6 +537,7 @@ evaluation (internal): ast : create_array(create_array(a, b), null, null) raw expr : create_array(create_array(ColumnRef(0)::Int16, ColumnRef(1)::Int16), NULL, NULL) checked expr : create_array(CAST(create_array(ColumnRef(0), ColumnRef(1)) AS Array(Int16) NULL), CAST(NULL AS Array(Int16) NULL), CAST(NULL AS Array(Int16) NULL)) +optimized expr : create_array(CAST(create_array(ColumnRef(0), ColumnRef(1)) AS Array(Int16) NULL), NULL, NULL) evaluation: +--------+---------+---------+--------------------------+ | | a | b | Output | @@ -537,17 +564,17 @@ ast : get(a, b) raw expr : get(ColumnRef(0)::Array(Int16), ColumnRef(1)::UInt8) checked expr : get(ColumnRef(0), CAST(ColumnRef(1) AS Int16)) evaluation: -+--------+----------------------------------------------------------------------------------+---------+----------+ -| | a | b | Output | -+--------+----------------------------------------------------------------------------------+---------+----------+ -| Type | Array(Int16) | UInt8 | Int16 | -| Domain | [{0..=99}] | {0..=4} | {0..=99} | -| Row 0 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] | 0 | 0 | -| Row 1 | [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] | 1 | 21 | -| Row 2 | [40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59] | 2 | 42 | -| Row 3 | [60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79] | 3 | 63 | -| Row 4 | [80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] | 4 | 84 | -+--------+----------------------------------------------------------------------------------+---------+----------+ ++--------+----------------------------------------------------------------------------------+---------+---------+ +| | a | b | Output | ++--------+----------------------------------------------------------------------------------+---------+---------+ +| Type | Array(Int16) | UInt8 | Int16 | +| Domain | [{0..=99}] | {0..=4} | Unknown | +| Row 0 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] | 0 | 0 | +| Row 1 | [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] | 1 | 21 | +| Row 2 | [40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59] | 2 | 42 | +| Row 3 | [60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79] | 3 | 63 | +| Row 4 | [80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99] | 4 | 84 | ++--------+----------------------------------------------------------------------------------+---------+---------+ evaluation (internal): +--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -566,7 +593,7 @@ evaluation: | | a | b | Output | +--------+----------------------------------------------------------------------------------------------------------------+---------+----------------------+ | Type | Array(Array(Int16)) | UInt8 | Array(Int16) | -| Domain | [[{0..=99}]] | {0..=4} | [{0..=99}] | +| Domain | [[{0..=99}]] | {0..=4} | Unknown | | Row 0 | [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17, 18, 19]] | 0 | [0, 1, 2, 3, 4] | | Row 1 | [[20, 21, 22, 23, 24], [25, 26, 27, 28, 29], [30, 31, 32, 33, 34], [35, 36, 37, 38, 39]] | 1 | [25, 26, 27, 28, 29] | | Row 2 | [[40, 41, 42, 43, 44], [45, 46, 47, 48, 49], [50, 51, 52, 53, 54]] | 2 | [50, 51, 52, 53, 54] | @@ -659,18 +686,18 @@ ast : create_tuple(TRY_CAST(a AS FLOAT32), TRY_CAST(a AS INT32), TRY_ raw expr : create_tuple(TRY_CAST(ColumnRef(0)::UInt64 AS Float32), TRY_CAST(ColumnRef(0)::UInt64 AS Int32), TRY_CAST(ColumnRef(1)::Float64 AS Float32), TRY_CAST(ColumnRef(1)::Float64 AS Int32)) checked expr : create_tuple(TRY_CAST(ColumnRef(0) AS Float32 NULL), TRY_CAST(ColumnRef(0) AS Int32 NULL), TRY_CAST(ColumnRef(1) AS Float32 NULL), TRY_CAST(ColumnRef(1) AS Int32 NULL)) evaluation: -+--------+----------------------------+---------------------------------+---------------------------------------------------------------------------------------------------------------+ -| | a | b | Output | -+--------+----------------------------+---------------------------------+---------------------------------------------------------------------------------------------------------------+ -| Type | UInt64 | Float64 | (Float32 NULL, Int32 NULL, Float32 NULL, Int32 NULL) | -| Domain | {0..=18446744073709551615} | {-1.7976931348623157e308..=inf} | ({0.0..=1.8446744073709552e19}, {0..=2147483647} ∪ {NULL}, {-inf..=inf}, {-2147483648..=2147483647} ∪ {NULL}) | -| Row 0 | 0 | 0.0 | (0.0, 0, 0.0, 0) | -| Row 1 | 1 | 4294967295.0 | (1.0, 1, 4294967300.0, NULL) | -| Row 2 | 255 | 1.8446744073709552e19 | (255.0, 255, 1.8446744e19, NULL) | -| Row 3 | 65535 | -1.7976931348623157e308 | (65535.0, 65535, -inf, NULL) | -| Row 4 | 4294967295 | 1.7976931348623157e308 | (4294967300.0, NULL, inf, NULL) | -| Row 5 | 18446744073709551615 | inf | (1.8446744e19, NULL, inf, NULL) | -+--------+----------------------------+---------------------------------+---------------------------------------------------------------------------------------------------------------+ ++--------+----------------------------+---------------------------------+------------------------------------------------------------------------------------------------------+ +| | a | b | Output | ++--------+----------------------------+---------------------------------+------------------------------------------------------------------------------------------------------+ +| Type | UInt64 | Float64 | (Float32 NULL, Int32 NULL, Float32 NULL, Int32 NULL) | +| Domain | {0..=18446744073709551615} | {-1.7976931348623157e308..=inf} | ({0.0..=1.8446744e19}, {0..=2147483647} ∪ {NULL}, {-inf..=inf}, {-2147483648..=2147483647} ∪ {NULL}) | +| Row 0 | 0 | 0.0 | (0.0, 0, 0.0, 0) | +| Row 1 | 1 | 4294967295.0 | (1.0, 1, 4294967300.0, NULL) | +| Row 2 | 255 | 1.8446744073709552e19 | (255.0, 255, 1.8446744e19, NULL) | +| Row 3 | 65535 | -1.7976931348623157e308 | (65535.0, 65535, -inf, NULL) | +| Row 4 | 4294967295 | 1.7976931348623157e308 | (4294967300.0, NULL, inf, NULL) | +| Row 5 | 18446744073709551615 | inf | (1.8446744e19, NULL, inf, NULL) | ++--------+----------------------------+---------------------------------+------------------------------------------------------------------------------------------------------+ evaluation (internal): +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -684,6 +711,7 @@ evaluation (internal): ast : TRY_CAST(create_array(create_array(a, b), null, null) AS Array(Array(Int8))) raw expr : TRY_CAST(create_array(create_array(ColumnRef(0)::Int16, ColumnRef(1)::Int16), NULL, NULL) AS Array(Array(Int8))) checked expr : TRY_CAST(create_array(CAST(create_array(ColumnRef(0), ColumnRef(1)) AS Array(Int16) NULL), CAST(NULL AS Array(Int16) NULL), CAST(NULL AS Array(Int16) NULL)) AS Array(Array(Int8 NULL) NULL) NULL) +optimized expr : TRY_CAST(create_array(CAST(create_array(ColumnRef(0), ColumnRef(1)) AS Array(Int16) NULL), NULL, NULL) AS Array(Array(Int8 NULL) NULL) NULL) evaluation: +--------+-----------+------------+------------------------------------+ | | a | b | Output | diff --git a/common/functions-v2/src/scalars/control.rs b/common/functions-v2/src/scalars/control.rs index 75e96421feee1..9d2cb5c7eee61 100644 --- a/common/functions-v2/src/scalars/control.rs +++ b/common/functions-v2/src/scalars/control.rs @@ -55,7 +55,7 @@ pub fn register(registry: &mut FunctionRegistry) { has_false: false, }), ) => { - return args_domain[cond_idx + 1].clone(); + return Some(args_domain[cond_idx + 1].clone()); } ( None, @@ -82,7 +82,7 @@ pub fn register(registry: &mut FunctionRegistry) { has_false: false, }), ) => { - return prev_domain.merge(&args_domain[cond_idx + 1]); + return Some(prev_domain.merge(&args_domain[cond_idx + 1])); } ( Some(_), @@ -106,10 +106,10 @@ pub fn register(registry: &mut FunctionRegistry) { } } - match domain { + Some(match domain { Some(domain) => domain.merge(args_domain.last().unwrap()), None => args_domain.last().unwrap().clone(), - } + }) }), eval: Box::new(|args, generics| { let len = args.iter().find_map(|arg| match arg { diff --git a/common/functions-v2/src/scalars/string.rs b/common/functions-v2/src/scalars/string.rs index 5696180755af2..8178bcba0cb6c 100644 --- a/common/functions-v2/src/scalars/string.rs +++ b/common/functions-v2/src/scalars/string.rs @@ -22,7 +22,7 @@ use common_expression::types::NumberType; use common_expression::types::StringType; use common_expression::FunctionProperty; use common_expression::FunctionRegistry; -use common_expression::UIntDomain; +use common_expression::NumberDomain; use common_expression::Value; use common_expression::ValueRef; @@ -182,13 +182,13 @@ pub fn register(registry: &mut FunctionRegistry) { "ascii", FunctionProperty::default(), |domain| { - Some(UIntDomain { - min: domain.min.first().cloned().unwrap_or_default() as u64, + Some(NumberDomain { + min: domain.min.first().cloned().unwrap_or(0), max: domain .max .as_ref() .map(|v| v.first().cloned().unwrap_or_default()) - .unwrap_or(u8::MAX) as u64, + .unwrap_or(u8::MAX), }) }, |val| val.first().cloned().unwrap_or_default(), diff --git a/common/functions-v2/tests/it/scalars/mod.rs b/common/functions-v2/tests/it/scalars/mod.rs index 43149b954f71d..f57753da4c049 100644 --- a/common/functions-v2/tests/it/scalars/mod.rs +++ b/common/functions-v2/tests/it/scalars/mod.rs @@ -20,7 +20,7 @@ use common_expression::type_check; use common_expression::types::DataType; use common_expression::Chunk; use common_expression::Column; -use common_expression::DomainCalculator; +use common_expression::ConstantFolder; use common_expression::Evaluator; use common_expression::FunctionContext; use common_expression::RemoteExpr; @@ -54,8 +54,8 @@ pub fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Co .map(|(_, _, col)| col.domain()) .collect::>(); - let domain_calculator = DomainCalculator::new(input_domains.clone()); - let output_domain = domain_calculator.calculate(&expr)?; + let constant_folder = ConstantFolder::new(&input_domains); + let (optimized_expr, output_domain) = constant_folder.fold(&expr); let num_rows = columns.iter().map(|col| col.2.len()).max().unwrap_or(0); let chunk = Chunk::new( @@ -71,26 +71,42 @@ pub fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Co }); let evaluator = Evaluator { - input_columns: chunk, + input_columns: &chunk, context: FunctionContext::default(), }; - let result = evaluator.run(&expr)?; + let result = evaluator.run(&expr); + let optimized_result = evaluator.run(&optimized_expr); + match &result { + Ok(result) => assert!( + result + .as_ref() + .sematically_eq(&optimized_result.unwrap().as_ref()) + ), + Err(e) => assert_eq!(e, &optimized_result.unwrap_err()), + } ( raw_expr, expr, input_domains, output_ty, - output_domain, - result, + optimized_expr, + output_domain + .as_ref() + .map(ToString::to_string) + .unwrap_or_else(|| "Unknown".to_string()), + result?, ) }; match result { - Ok((raw_expr, expr, input_domains, output_ty, output_domain, result)) => { + Ok((raw_expr, expr, input_domains, output_ty, optimized_expr, output_domain, result)) => { writeln!(file, "ast : {text}").unwrap(); writeln!(file, "raw expr : {raw_expr}").unwrap(); writeln!(file, "checked expr : {expr}").unwrap(); + if optimized_expr != expr { + writeln!(file, "optimized expr : {optimized_expr}").unwrap(); + } match result { Value::Scalar(output_scalar) => { diff --git a/common/functions-v2/tests/it/scalars/testdata/boolean.txt b/common/functions-v2/tests/it/scalars/testdata/boolean.txt index 0e3f1edce3bcb..10515191830f8 100644 --- a/common/functions-v2/tests/it/scalars/testdata/boolean.txt +++ b/common/functions-v2/tests/it/scalars/testdata/boolean.txt @@ -1,6 +1,7 @@ ast : true AND false raw expr : and(true, false) checked expr : and(true, false) +optimized expr : false output type : Boolean output domain : {FALSE} output : false @@ -9,6 +10,7 @@ output : false ast : null AND false raw expr : and(NULL, false) checked expr : and(NULL, CAST(false AS Boolean NULL)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -17,6 +19,7 @@ output : NULL ast : NOT a raw expr : not(ColumnRef(0)::NULL) checked expr : not(ColumnRef(0)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -91,6 +94,7 @@ evaluation (internal): ast : true OR false raw expr : or(true, false) checked expr : or(true, false) +optimized expr : true output type : Boolean output domain : {TRUE} output : true @@ -99,6 +103,7 @@ output : true ast : null OR false raw expr : or(NULL, false) checked expr : or(NULL, CAST(false AS Boolean NULL)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -107,6 +112,7 @@ output : NULL ast : true XOR false raw expr : xor(true, false) checked expr : xor(true, false) +optimized expr : true output type : Boolean output domain : {TRUE} output : true @@ -115,6 +121,7 @@ output : true ast : null XOR false raw expr : xor(NULL, false) checked expr : xor(NULL, CAST(false AS Boolean NULL)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL diff --git a/common/functions-v2/tests/it/scalars/testdata/control.txt b/common/functions-v2/tests/it/scalars/testdata/control.txt index ef305354a923b..2f02a31301ec6 100644 --- a/common/functions-v2/tests/it/scalars/testdata/control.txt +++ b/common/functions-v2/tests/it/scalars/testdata/control.txt @@ -1,6 +1,7 @@ ast : multi_if(false, 1, false, 2, NULL) raw expr : multi_if(false, 1_u8, false, 2_u8, NULL) -checked expr : multi_if(false, CAST(1 AS UInt8 NULL), false, CAST(2 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) +checked expr : multi_if(false, CAST(1_u8 AS UInt8 NULL), false, CAST(2_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) +optimized expr : NULL output type : UInt8 NULL output domain : {NULL} output : NULL @@ -8,7 +9,8 @@ output : NULL ast : multi_if(true, 1, false, 2, NULL) raw expr : multi_if(true, 1_u8, false, 2_u8, NULL) -checked expr : multi_if(true, CAST(1 AS UInt8 NULL), false, CAST(2 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) +checked expr : multi_if(true, CAST(1_u8 AS UInt8 NULL), false, CAST(2_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) +optimized expr : 1_u8 output type : UInt8 NULL output domain : {1..=1} output : 1 @@ -16,7 +18,8 @@ output : 1 ast : multi_if(false, 1, true, 2, NULL) raw expr : multi_if(false, 1_u8, true, 2_u8, NULL) -checked expr : multi_if(false, CAST(1 AS UInt8 NULL), true, CAST(2 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) +checked expr : multi_if(false, CAST(1_u8 AS UInt8 NULL), true, CAST(2_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) +optimized expr : 2_u8 output type : UInt8 NULL output domain : {2..=2} output : 2 @@ -24,7 +27,8 @@ output : 2 ast : multi_if(true, 1, true, 2, NULL) raw expr : multi_if(true, 1_u8, true, 2_u8, NULL) -checked expr : multi_if(true, CAST(1 AS UInt8 NULL), true, CAST(2 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) +checked expr : multi_if(true, CAST(1_u8 AS UInt8 NULL), true, CAST(2_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) +optimized expr : 1_u8 output type : UInt8 NULL output domain : {1..=1} output : 1 @@ -32,7 +36,8 @@ output : 1 ast : multi_if(true, 1, true, NULL, 2) raw expr : multi_if(true, 1_u8, true, NULL, 2_u8) -checked expr : multi_if(true, CAST(1 AS UInt8 NULL), true, CAST(NULL AS UInt8 NULL), CAST(2 AS UInt8 NULL)) +checked expr : multi_if(true, CAST(1_u8 AS UInt8 NULL), true, CAST(NULL AS UInt8 NULL), CAST(2_u8 AS UInt8 NULL)) +optimized expr : 1_u8 output type : UInt8 NULL output domain : {1..=1} output : 1 @@ -40,7 +45,8 @@ output : 1 ast : multi_if(true, 1, NULL) raw expr : multi_if(true, 1_u8, NULL) -checked expr : multi_if(true, CAST(1 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) +checked expr : multi_if(true, CAST(1_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) +optimized expr : 1_u8 output type : UInt8 NULL output domain : {1..=1} output : 1 @@ -48,7 +54,8 @@ output : 1 ast : multi_if(false, 1, NULL) raw expr : multi_if(false, 1_u8, NULL) -checked expr : multi_if(false, CAST(1 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) +checked expr : multi_if(false, CAST(1_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) +optimized expr : NULL output type : UInt8 NULL output domain : {NULL} output : NULL @@ -107,6 +114,7 @@ evaluation (internal): ast : multi_if(cond_a, expr_a, cond_b, expr_b, expr_else) raw expr : multi_if(ColumnRef(0)::Boolean, ColumnRef(1)::Int64, ColumnRef(2)::Boolean, ColumnRef(3)::Int64, ColumnRef(4)::Int64 NULL) checked expr : multi_if(ColumnRef(0), CAST(ColumnRef(1) AS Int64 NULL), ColumnRef(2), CAST(ColumnRef(3) AS Int64 NULL), ColumnRef(4)) +optimized expr : multi_if(ColumnRef(0), CAST(ColumnRef(1) AS Int64 NULL), true, CAST(ColumnRef(3) AS Int64 NULL), ColumnRef(4)) evaluation: +--------+---------------+---------+---------+---------+-------------------+------------+ | | cond_a | expr_a | cond_b | expr_b | expr_else | Output | diff --git a/common/functions-v2/tests/it/scalars/testdata/string.txt b/common/functions-v2/tests/it/scalars/testdata/string.txt index 84fa548a7bb08..8b8908f5950d6 100644 --- a/common/functions-v2/tests/it/scalars/testdata/string.txt +++ b/common/functions-v2/tests/it/scalars/testdata/string.txt @@ -2,7 +2,7 @@ ast : upper('Abc') raw expr : upper("Abc") checked expr : upper("Abc") output type : String -output domain : {""..} +output domain : Unknown output : "ABC" @@ -10,7 +10,7 @@ ast : upper('Dobrý den') raw expr : upper("Dobrý den") checked expr : upper("Dobrý den") output type : String -output domain : {""..} +output domain : Unknown output : "DOBRÝ DEN" @@ -18,13 +18,14 @@ ast : upper('ß😀山') raw expr : upper("ß😀山") checked expr : upper("ß😀山") output type : String -output domain : {""..} +output domain : Unknown output : "SS😀山" ast : upper(NULL) raw expr : upper(NULL) checked expr : upper(NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -38,7 +39,7 @@ evaluation: | | a | Output | +--------+-------------------+-------------+ | Type | String | String | -| Domain | {"Abc"..="ß😀山"} | {""..} | +| Domain | {"Abc"..="ß😀山"} | Unknown | | Row 0 | "Abc" | "ABC" | | Row 1 | "Dobrý den" | "DOBRÝ DEN" | | Row 2 | "ß😀山" | "SS😀山" | @@ -56,7 +57,7 @@ ast : lower('Abc') raw expr : lower("Abc") checked expr : lower("Abc") output type : String -output domain : {""..} +output domain : Unknown output : "abc" @@ -64,7 +65,7 @@ ast : lower('DOBRÝ DEN') raw expr : lower("DOBRÝ DEN") checked expr : lower("DOBRÝ DEN") output type : String -output domain : {""..} +output domain : Unknown output : "dobrý den" @@ -72,13 +73,14 @@ ast : lower('İ😀山') raw expr : lower("İ😀山") checked expr : lower("İ😀山") output type : String -output domain : {""..} +output domain : Unknown output : "i\u{307}😀山" ast : lower(NULL) raw expr : lower(NULL) checked expr : lower(NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -92,7 +94,7 @@ evaluation: | | a | Output | +--------+-------------------+----------------+ | Type | String | String | -| Domain | {"Abc"..="İ😀山"} | {""..} | +| Domain | {"Abc"..="İ😀山"} | Unknown | | Row 0 | "Abc" | "abc" | | Row 1 | "DOBRÝ DEN" | "dobrý den" | | Row 2 | "İ😀山" | "i\u{307}😀山" | @@ -110,13 +112,14 @@ ast : bit_length('latin') raw expr : bit_length("latin") checked expr : bit_length("latin") output type : UInt64 -output domain : {0..=18446744073709551615} +output domain : Unknown output : 40 ast : bit_length(NULL) raw expr : bit_length(NULL) checked expr : bit_length(NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -126,15 +129,15 @@ ast : bit_length(a) raw expr : bit_length(ColumnRef(0)::String) checked expr : bit_length(ColumnRef(0)) evaluation: -+--------+-----------------------------------+----------------------------+ -| | a | Output | -+--------+-----------------------------------+----------------------------+ -| Type | String | UInt64 | -| Domain | {"latin"..="кириллица and latin"} | {0..=18446744073709551615} | -| Row 0 | "latin" | 40 | -| Row 1 | "кириллица" | 144 | -| Row 2 | "кириллица and latin" | 224 | -+--------+-----------------------------------+----------------------------+ ++--------+-----------------------------------+---------+ +| | a | Output | ++--------+-----------------------------------+---------+ +| Type | String | UInt64 | +| Domain | {"latin"..="кириллица and latin"} | Unknown | +| Row 0 | "latin" | 40 | +| Row 1 | "кириллица" | 144 | +| Row 2 | "кириллица and latin" | 224 | ++--------+-----------------------------------+---------+ evaluation (internal): +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -148,13 +151,14 @@ ast : octet_length('latin') raw expr : octet_length("latin") checked expr : octet_length("latin") output type : UInt64 -output domain : {0..=18446744073709551615} +output domain : Unknown output : 5 ast : octet_length(NULL) raw expr : octet_length(NULL) checked expr : octet_length(NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -164,15 +168,15 @@ ast : length(a) raw expr : length(ColumnRef(0)::String) checked expr : octet_length(ColumnRef(0)) evaluation: -+--------+-----------------------------------+----------------------------+ -| | a | Output | -+--------+-----------------------------------+----------------------------+ -| Type | String | UInt64 | -| Domain | {"latin"..="кириллица and latin"} | {0..=18446744073709551615} | -| Row 0 | "latin" | 5 | -| Row 1 | "кириллица" | 18 | -| Row 2 | "кириллица and latin" | 28 | -+--------+-----------------------------------+----------------------------+ ++--------+-----------------------------------+---------+ +| | a | Output | ++--------+-----------------------------------+---------+ +| Type | String | UInt64 | +| Domain | {"latin"..="кириллица and latin"} | Unknown | +| Row 0 | "latin" | 5 | +| Row 1 | "кириллица" | 18 | +| Row 2 | "кириллица and latin" | 28 | ++--------+-----------------------------------+---------+ evaluation (internal): +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -186,13 +190,14 @@ ast : char_length('latin') raw expr : char_length("latin") checked expr : char_length("latin") output type : UInt64 -output domain : {0..=18446744073709551615} +output domain : Unknown output : 5 ast : char_length(NULL) raw expr : char_length(NULL) checked expr : char_length(NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -202,15 +207,15 @@ ast : character_length(a) raw expr : character_length(ColumnRef(0)::String) checked expr : char_length(ColumnRef(0)) evaluation: -+--------+-----------------------------------+----------------------------+ -| | a | Output | -+--------+-----------------------------------+----------------------------+ -| Type | String | UInt64 | -| Domain | {"latin"..="кириллица and latin"} | {0..=18446744073709551615} | -| Row 0 | "latin" | 5 | -| Row 1 | "кириллица" | 9 | -| Row 2 | "кириллица and latin" | 19 | -+--------+-----------------------------------+----------------------------+ ++--------+-----------------------------------+---------+ +| | a | Output | ++--------+-----------------------------------+---------+ +| Type | String | UInt64 | +| Domain | {"latin"..="кириллица and latin"} | Unknown | +| Row 0 | "latin" | 5 | +| Row 1 | "кириллица" | 9 | +| Row 2 | "кириллица and latin" | 19 | ++--------+-----------------------------------+---------+ evaluation (internal): +--------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -224,7 +229,7 @@ ast : to_base64('Abc') raw expr : to_base64("Abc") checked expr : to_base64("Abc") output type : String -output domain : {""..} +output domain : Unknown output : "QWJj" @@ -232,13 +237,14 @@ ast : to_base64('123') raw expr : to_base64("123") checked expr : to_base64("123") output type : String -output domain : {""..} +output domain : Unknown output : "MTIz" ast : to_base64(Null) raw expr : to_base64(NULL) checked expr : to_base64(NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -248,14 +254,14 @@ ast : to_base64(a) raw expr : to_base64(ColumnRef(0)::String) checked expr : to_base64(ColumnRef(0)) evaluation: -+--------+-----------------+--------+ -| | a | Output | -+--------+-----------------+--------+ -| Type | String | String | -| Domain | {"123"..="Abc"} | {""..} | -| Row 0 | "Abc" | "QWJj" | -| Row 1 | "123" | "MTIz" | -+--------+-----------------+--------+ ++--------+-----------------+---------+ +| | a | Output | ++--------+-----------------+---------+ +| Type | String | String | +| Domain | {"123"..="Abc"} | Unknown | +| Row 0 | "Abc" | "QWJj" | +| Row 1 | "123" | "MTIz" | ++--------+-----------------+---------+ evaluation (internal): +--------+---------------------------------------------------------------------------------------+ | Column | Data | @@ -269,7 +275,7 @@ ast : from_base64('QWJj') raw expr : from_base64("QWJj") checked expr : from_base64("QWJj") output type : String -output domain : {""..} +output domain : Unknown output : "Abc" @@ -277,13 +283,14 @@ ast : from_base64('MTIz') raw expr : from_base64("MTIz") checked expr : from_base64("MTIz") output type : String -output domain : {""..} +output domain : Unknown output : "123" ast : from_base64(Null) raw expr : from_base64(NULL) checked expr : from_base64(NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -293,14 +300,14 @@ ast : from_base64(a) raw expr : from_base64(ColumnRef(0)::String) checked expr : from_base64(ColumnRef(0)) evaluation: -+--------+-------------------+--------+ -| | a | Output | -+--------+-------------------+--------+ -| Type | String | String | -| Domain | {"MTIz"..="QWJj"} | {""..} | -| Row 0 | "QWJj" | "Abc" | -| Row 1 | "MTIz" | "123" | -+--------+-------------------+--------+ ++--------+-------------------+---------+ +| | a | Output | ++--------+-------------------+---------+ +| Type | String | String | +| Domain | {"MTIz"..="QWJj"} | Unknown | +| Row 0 | "QWJj" | "Abc" | +| Row 1 | "MTIz" | "123" | ++--------+-------------------+---------+ evaluation (internal): +--------+---------------------------------------------------------------------------------------+ | Column | Data | @@ -314,7 +321,7 @@ ast : quote('a\0b') raw expr : quote("a\0b") checked expr : quote("a\0b") output type : String -output domain : {""..} +output domain : Unknown output : "a\\0b" @@ -322,7 +329,7 @@ ast : quote('a\'b') raw expr : quote("a'b") checked expr : quote("a'b") output type : String -output domain : {""..} +output domain : Unknown output : "a\\'b" @@ -330,7 +337,7 @@ ast : quote('a\"b') raw expr : quote("a\"b") checked expr : quote("a\"b") output type : String -output domain : {""..} +output domain : Unknown output : "a\\\"b" @@ -338,7 +345,7 @@ ast : quote('a\bb') raw expr : quote("a\u{8}b") checked expr : quote("a\u{8}b") output type : String -output domain : {""..} +output domain : Unknown output : "a\\bb" @@ -346,7 +353,7 @@ ast : quote('a\nb') raw expr : quote("a\nb") checked expr : quote("a\nb") output type : String -output domain : {""..} +output domain : Unknown output : "a\\nb" @@ -354,7 +361,7 @@ ast : quote('a\rb') raw expr : quote("a\rb") checked expr : quote("a\rb") output type : String -output domain : {""..} +output domain : Unknown output : "a\\rb" @@ -362,7 +369,7 @@ ast : quote('a\tb') raw expr : quote("a\tb") checked expr : quote("a\tb") output type : String -output domain : {""..} +output domain : Unknown output : "a\\tb" @@ -370,7 +377,7 @@ ast : quote('a\\b') raw expr : quote("a\\b") checked expr : quote("a\\b") output type : String -output domain : {""..} +output domain : Unknown output : "a\\\\b" @@ -378,7 +385,7 @@ ast : quote('你好') raw expr : quote("你好") checked expr : quote("你好") output type : String -output domain : {""..} +output domain : Unknown output : "你好" @@ -386,7 +393,7 @@ ast : quote('ß😀山') raw expr : quote("ß😀山") checked expr : quote("ß😀山") output type : String -output domain : {""..} +output domain : Unknown output : "ß😀山" @@ -394,13 +401,14 @@ ast : quote('Dobrý den') raw expr : quote("Dobrý den") checked expr : quote("Dobrý den") output type : String -output domain : {""..} +output domain : Unknown output : "Dobrý den" ast : quote(Null) raw expr : quote(NULL) checked expr : quote(NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -414,7 +422,7 @@ evaluation: | | a | Output | +--------+---------------------+-------------+ | Type | String | String | -| Domain | {"a\\'b"..="a\\nb"} | {""..} | +| Domain | {"a\\'b"..="a\\nb"} | Unknown | | Row 0 | "a\\0b" | "a\\\\0b" | | Row 1 | "a\\'b" | "a\\\\\\'b" | | Row 2 | "a\\nb" | "a\\\\nb" | @@ -432,7 +440,7 @@ ast : reverse('abc') raw expr : reverse("abc") checked expr : reverse("abc") output type : String -output domain : {""..} +output domain : Unknown output : "cba" @@ -440,7 +448,7 @@ ast : reverse('a') raw expr : reverse("a") checked expr : reverse("a") output type : String -output domain : {""..} +output domain : Unknown output : "a" @@ -448,7 +456,7 @@ ast : reverse('') raw expr : reverse("") checked expr : reverse("") output type : String -output domain : {""..} +output domain : Unknown output : "" @@ -456,7 +464,7 @@ ast : reverse('你好') raw expr : reverse("你好") checked expr : reverse("你好") output type : String -output domain : {""..} +output domain : Unknown output : "��堽�" @@ -464,7 +472,7 @@ ast : reverse('ß😀山') raw expr : reverse("ß😀山") checked expr : reverse("ß😀山") output type : String -output domain : {""..} +output domain : Unknown output : "��倘���" @@ -472,13 +480,14 @@ ast : reverse('Dobrý den') raw expr : reverse("Dobrý den") checked expr : reverse("Dobrý den") output type : String -output domain : {""..} +output domain : Unknown output : "ned ��rboD" ast : reverse(Null) raw expr : reverse(NULL) checked expr : reverse(NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -488,15 +497,15 @@ ast : reverse(a) raw expr : reverse(ColumnRef(0)::String) checked expr : reverse(ColumnRef(0)) evaluation: -+--------+--------------+--------+ -| | a | Output | -+--------+--------------+--------+ -| Type | String | String | -| Domain | {""..="abc"} | {""..} | -| Row 0 | "abc" | "cba" | -| Row 1 | "a" | "a" | -| Row 2 | "" | "" | -+--------+--------------+--------+ ++--------+--------------+---------+ +| | a | Output | ++--------+--------------+---------+ +| Type | String | String | +| Domain | {""..="abc"} | Unknown | +| Row 0 | "abc" | "cba" | +| Row 1 | "a" | "a" | +| Row 2 | "" | "" | ++--------+--------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------+ | Column | Data | @@ -509,6 +518,7 @@ evaluation (internal): ast : ascii('1') raw expr : ascii("1") checked expr : ascii("1") +optimized expr : 49_u8 output type : UInt8 output domain : {49..=49} output : 49 @@ -517,6 +527,7 @@ output : 49 ast : ascii('123') raw expr : ascii("123") checked expr : ascii("123") +optimized expr : 49_u8 output type : UInt8 output domain : {49..=49} output : 49 @@ -525,6 +536,7 @@ output : 49 ast : ascii('-1') raw expr : ascii("-1") checked expr : ascii("-1") +optimized expr : 45_u8 output type : UInt8 output domain : {45..=45} output : 45 @@ -533,6 +545,7 @@ output : 45 ast : ascii('') raw expr : ascii("") checked expr : ascii("") +optimized expr : 0_u8 output type : UInt8 output domain : {0..=0} output : 0 @@ -541,6 +554,7 @@ output : 0 ast : ascii('你好') raw expr : ascii("你好") checked expr : ascii("你好") +optimized expr : 228_u8 output type : UInt8 output domain : {228..=228} output : 228 @@ -549,6 +563,7 @@ output : 228 ast : ascii('😀123') raw expr : ascii("😀123") checked expr : ascii("😀123") +optimized expr : 240_u8 output type : UInt8 output domain : {240..=240} output : 240 @@ -557,6 +572,7 @@ output : 240 ast : ascii(Null) raw expr : ascii(NULL) checked expr : ascii(NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -588,6 +604,7 @@ evaluation (internal): ast : ascii(b) raw expr : ascii(ColumnRef(0)::String) checked expr : ascii(ColumnRef(0)) +optimized expr : 0_u8 evaluation: +--------+-----------+---------+ | | b | Output | From e128781f12c4b42f0ee44f57e6640a4a594cc3ff Mon Sep 17 00:00:00 2001 From: andylokandy Date: Tue, 9 Aug 2022 23:52:59 +0800 Subject: [PATCH 26/59] update test --- .../tests/it/scalars/testdata/string.txt | 381 +++++++++--------- 1 file changed, 198 insertions(+), 183 deletions(-) diff --git a/common/functions-v2/tests/it/scalars/testdata/string.txt b/common/functions-v2/tests/it/scalars/testdata/string.txt index 8b8908f5950d6..d3f6c0c4051b0 100644 --- a/common/functions-v2/tests/it/scalars/testdata/string.txt +++ b/common/functions-v2/tests/it/scalars/testdata/string.txt @@ -626,7 +626,7 @@ ast : ltrim(' abc ') raw expr : ltrim(" abc ") checked expr : ltrim(" abc ") output type : String -output domain : {""..} +output domain : Unknown output : "abc " @@ -634,13 +634,14 @@ ast : ltrim(' ') raw expr : ltrim(" ") checked expr : ltrim(" ") output type : String -output domain : {""..} +output domain : Unknown output : "" ast : ltrim(NULL) raw expr : ltrim(NULL) checked expr : ltrim(NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -654,7 +655,7 @@ evaluation: | | a | Output | +--------+-----------------------+----------+ | Type | String | String | -| Domain | {" abc"..="abc "} | {""..} | +| Domain | {" abc"..="abc "} | Unknown | | Row 0 | "abc" | "abc" | | Row 1 | " abc" | "abc" | | Row 2 | " abc " | "abc " | @@ -673,7 +674,7 @@ ast : rtrim(' abc ') raw expr : rtrim(" abc ") checked expr : rtrim(" abc ") output type : String -output domain : {""..} +output domain : Unknown output : " abc" @@ -681,13 +682,14 @@ ast : rtrim(' ') raw expr : rtrim(" ") checked expr : rtrim(" ") output type : String -output domain : {""..} +output domain : Unknown output : "" ast : rtrim(NULL) raw expr : rtrim(NULL) checked expr : rtrim(NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -701,7 +703,7 @@ evaluation: | | a | Output | +--------+-----------------------+----------+ | Type | String | String | -| Domain | {" abc"..="abc "} | {""..} | +| Domain | {" abc"..="abc "} | Unknown | | Row 0 | "abc" | "abc" | | Row 1 | " abc" | " abc" | | Row 2 | " abc " | " abc" | @@ -720,7 +722,7 @@ ast : trim_leading('aaabbaaa', 'a') raw expr : trim_leading("aaabbaaa", "a") checked expr : trim_leading("aaabbaaa", "a") output type : String -output domain : {""..} +output domain : Unknown output : "bbaaa" @@ -728,7 +730,7 @@ ast : trim_leading('aaabbaaa', 'aa') raw expr : trim_leading("aaabbaaa", "aa") checked expr : trim_leading("aaabbaaa", "aa") output type : String -output domain : {""..} +output domain : Unknown output : "abbaaa" @@ -736,7 +738,7 @@ ast : trim_leading('aaaaaaaa', 'a') raw expr : trim_leading("aaaaaaaa", "a") checked expr : trim_leading("aaaaaaaa", "a") output type : String -output domain : {""..} +output domain : Unknown output : "" @@ -744,13 +746,14 @@ ast : trim_leading('aaabbaaa', 'b') raw expr : trim_leading("aaabbaaa", "b") checked expr : trim_leading("aaabbaaa", "b") output type : String -output domain : {""..} +output domain : Unknown output : "aaabbaaa" ast : trim_leading(NULL, 'a') raw expr : trim_leading(NULL, "a") checked expr : trim_leading(NULL, CAST("a" AS String NULL)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -759,6 +762,7 @@ output : NULL ast : trim_leading('aaaaaaaa', NULL) raw expr : trim_leading("aaaaaaaa", NULL) checked expr : trim_leading(CAST("aaaaaaaa" AS String NULL), NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -772,7 +776,7 @@ evaluation: | | a | b | Output | +--------+-----------------------+-------------+----------+ | Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | | Row 0 | "aabbaa" | "a" | "bbaa" | | Row 1 | "bbccbb" | "b" | "bbccbb" | | Row 2 | "ccddcc" | "c" | "ccddcc" | @@ -791,15 +795,15 @@ ast : trim_leading(a, b) raw expr : trim_leading(ColumnRef(0)::String, ColumnRef(1)::String) checked expr : trim_leading(ColumnRef(0), ColumnRef(1)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "bbaa" | -| Row 1 | "bbccbb" | "b" | "ccbb" | -| Row 2 | "ccddcc" | "c" | "ddcc" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "bbaa" | +| Row 1 | "bbccbb" | "b" | "ccbb" | +| Row 2 | "ccddcc" | "c" | "ddcc" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -814,15 +818,15 @@ ast : trim_leading('aba', b) raw expr : trim_leading("aba", ColumnRef(1)::String) checked expr : trim_leading("aba", ColumnRef(1)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "ba" | -| Row 1 | "bbccbb" | "b" | "aba" | -| Row 2 | "ccddcc" | "c" | "aba" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "ba" | +| Row 1 | "bbccbb" | "b" | "aba" | +| Row 2 | "ccddcc" | "c" | "aba" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -837,7 +841,7 @@ ast : trim_trailing('aaabbaaa', 'a') raw expr : trim_trailing("aaabbaaa", "a") checked expr : trim_trailing("aaabbaaa", "a") output type : String -output domain : {""..} +output domain : Unknown output : "aaabb" @@ -845,7 +849,7 @@ ast : trim_trailing('aaabbaaa', 'aa') raw expr : trim_trailing("aaabbaaa", "aa") checked expr : trim_trailing("aaabbaaa", "aa") output type : String -output domain : {""..} +output domain : Unknown output : "aaabba" @@ -853,7 +857,7 @@ ast : trim_trailing('aaaaaaaa', 'a') raw expr : trim_trailing("aaaaaaaa", "a") checked expr : trim_trailing("aaaaaaaa", "a") output type : String -output domain : {""..} +output domain : Unknown output : "" @@ -861,13 +865,14 @@ ast : trim_trailing('aaabbaaa', 'b') raw expr : trim_trailing("aaabbaaa", "b") checked expr : trim_trailing("aaabbaaa", "b") output type : String -output domain : {""..} +output domain : Unknown output : "aaabbaaa" ast : trim_trailing(NULL, 'a') raw expr : trim_trailing(NULL, "a") checked expr : trim_trailing(NULL, CAST("a" AS String NULL)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -876,6 +881,7 @@ output : NULL ast : trim_trailing('aaaaaaaa', NULL) raw expr : trim_trailing("aaaaaaaa", NULL) checked expr : trim_trailing(CAST("aaaaaaaa" AS String NULL), NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -889,7 +895,7 @@ evaluation: | | a | b | Output | +--------+-----------------------+-------------+----------+ | Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | | Row 0 | "aabbaa" | "a" | "aabbaa" | | Row 1 | "bbccbb" | "b" | "bbcc" | | Row 2 | "ccddcc" | "c" | "ccddcc" | @@ -908,15 +914,15 @@ ast : trim_trailing(a, b) raw expr : trim_trailing(ColumnRef(0)::String, ColumnRef(1)::String) checked expr : trim_trailing(ColumnRef(0), ColumnRef(1)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "aabb" | -| Row 1 | "bbccbb" | "b" | "bbcc" | -| Row 2 | "ccddcc" | "c" | "ccdd" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "aabb" | +| Row 1 | "bbccbb" | "b" | "bbcc" | +| Row 2 | "ccddcc" | "c" | "ccdd" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -931,15 +937,15 @@ ast : trim_trailing('aba', b) raw expr : trim_trailing("aba", ColumnRef(1)::String) checked expr : trim_trailing("aba", ColumnRef(1)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "ab" | -| Row 1 | "bbccbb" | "b" | "aba" | -| Row 2 | "ccddcc" | "c" | "aba" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "ab" | +| Row 1 | "bbccbb" | "b" | "aba" | +| Row 2 | "ccddcc" | "c" | "aba" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -954,7 +960,7 @@ ast : trim_both('aaabbaaa', 'a') raw expr : trim_both("aaabbaaa", "a") checked expr : trim_both("aaabbaaa", "a") output type : String -output domain : {""..} +output domain : Unknown output : "bb" @@ -962,7 +968,7 @@ ast : trim_both('aaabbaaa', 'aa') raw expr : trim_both("aaabbaaa", "aa") checked expr : trim_both("aaabbaaa", "aa") output type : String -output domain : {""..} +output domain : Unknown output : "abba" @@ -970,7 +976,7 @@ ast : trim_both('aaaaaaaa', 'a') raw expr : trim_both("aaaaaaaa", "a") checked expr : trim_both("aaaaaaaa", "a") output type : String -output domain : {""..} +output domain : Unknown output : "" @@ -978,13 +984,14 @@ ast : trim_both('aaabbaaa', 'b') raw expr : trim_both("aaabbaaa", "b") checked expr : trim_both("aaabbaaa", "b") output type : String -output domain : {""..} +output domain : Unknown output : "aaabbaaa" ast : trim_both(NULL, 'a') raw expr : trim_both(NULL, "a") checked expr : trim_both(NULL, CAST("a" AS String NULL)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -993,6 +1000,7 @@ output : NULL ast : trim_both('aaaaaaaa', NULL) raw expr : trim_both("aaaaaaaa", NULL) checked expr : trim_both(CAST("aaaaaaaa" AS String NULL), NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -1006,7 +1014,7 @@ evaluation: | | a | b | Output | +--------+-----------------------+-------------+----------+ | Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | | Row 0 | "aabbaa" | "a" | "bb" | | Row 1 | "bbccbb" | "b" | "bbccbb" | | Row 2 | "ccddcc" | "c" | "ccddcc" | @@ -1025,15 +1033,15 @@ ast : trim_both(a, b) raw expr : trim_both(ColumnRef(0)::String, ColumnRef(1)::String) checked expr : trim_both(ColumnRef(0), ColumnRef(1)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "bb" | -| Row 1 | "bbccbb" | "b" | "cc" | -| Row 2 | "ccddcc" | "c" | "dd" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "bb" | +| Row 1 | "bbccbb" | "b" | "cc" | +| Row 2 | "ccddcc" | "c" | "dd" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -1048,15 +1056,15 @@ ast : trim_both('aba', b) raw expr : trim_both("aba", ColumnRef(1)::String) checked expr : trim_both("aba", ColumnRef(1)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "b" | -| Row 1 | "bbccbb" | "b" | "aba" | -| Row 2 | "ccddcc" | "c" | "aba" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "b" | +| Row 1 | "bbccbb" | "b" | "aba" | +| Row 2 | "ccddcc" | "c" | "aba" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -1071,7 +1079,7 @@ ast : trim(' abc ') raw expr : trim(" abc ") checked expr : trim(" abc ") output type : String -output domain : {""..} +output domain : Unknown output : "abc" @@ -1079,13 +1087,14 @@ ast : trim(' ') raw expr : trim(" ") checked expr : trim(" ") output type : String -output domain : {""..} +output domain : Unknown output : "" ast : trim(NULL) raw expr : trim(NULL) checked expr : trim(NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -1095,16 +1104,16 @@ ast : trim(a) raw expr : trim(ColumnRef(0)::String) checked expr : trim(ColumnRef(0)) evaluation: -+--------+-----------------------+--------+ -| | a | Output | -+--------+-----------------------+--------+ -| Type | String | String | -| Domain | {" abc"..="abc "} | {""..} | -| Row 0 | "abc" | "abc" | -| Row 1 | " abc" | "abc" | -| Row 2 | " abc " | "abc" | -| Row 3 | "abc " | "abc" | -+--------+-----------------------+--------+ ++--------+-----------------------+---------+ +| | a | Output | ++--------+-----------------------+---------+ +| Type | String | String | +| Domain | {" abc"..="abc "} | Unknown | +| Row 0 | "abc" | "abc" | +| Row 1 | " abc" | "abc" | +| Row 2 | " abc " | "abc" | +| Row 3 | "abc " | "abc" | ++--------+-----------------------+---------+ evaluation (internal): +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -1118,7 +1127,7 @@ ast : trim(both 'a' from 'aaabbaaa') raw expr : trim_both("aaabbaaa", "a") checked expr : trim_both("aaabbaaa", "a") output type : String -output domain : {""..} +output domain : Unknown output : "bb" @@ -1126,7 +1135,7 @@ ast : trim(both 'aa' from 'aaabbaaa') raw expr : trim_both("aaabbaaa", "aa") checked expr : trim_both("aaabbaaa", "aa") output type : String -output domain : {""..} +output domain : Unknown output : "abba" @@ -1134,7 +1143,7 @@ ast : trim(both 'a' from 'aaaaaaaa') raw expr : trim_both("aaaaaaaa", "a") checked expr : trim_both("aaaaaaaa", "a") output type : String -output domain : {""..} +output domain : Unknown output : "" @@ -1142,13 +1151,14 @@ ast : trim(both 'b' from 'aaabbaaa') raw expr : trim_both("aaabbaaa", "b") checked expr : trim_both("aaabbaaa", "b") output type : String -output domain : {""..} +output domain : Unknown output : "aaabbaaa" ast : trim(both 'a' from NULL) raw expr : trim_both(NULL, "a") checked expr : trim_both(NULL, CAST("a" AS String NULL)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -1157,6 +1167,7 @@ output : NULL ast : trim(both NULL from 'aaaaaaaa') raw expr : trim_both("aaaaaaaa", NULL) checked expr : trim_both(CAST("aaaaaaaa" AS String NULL), NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -1170,7 +1181,7 @@ evaluation: | | a | b | Output | +--------+-----------------------+-------------+----------+ | Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | | Row 0 | "aabbaa" | "a" | "bb" | | Row 1 | "bbccbb" | "b" | "bbccbb" | | Row 2 | "ccddcc" | "c" | "ccddcc" | @@ -1189,15 +1200,15 @@ ast : trim(both b from a) raw expr : trim_both(ColumnRef(0)::String, ColumnRef(1)::String) checked expr : trim_both(ColumnRef(0), ColumnRef(1)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "bb" | -| Row 1 | "bbccbb" | "b" | "cc" | -| Row 2 | "ccddcc" | "c" | "dd" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "bb" | +| Row 1 | "bbccbb" | "b" | "cc" | +| Row 2 | "ccddcc" | "c" | "dd" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -1212,15 +1223,15 @@ ast : trim(both a from a) raw expr : trim_both(ColumnRef(0)::String, ColumnRef(0)::String) checked expr : trim_both(ColumnRef(0), ColumnRef(0)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "" | -| Row 1 | "bbccbb" | "b" | "" | -| Row 2 | "ccddcc" | "c" | "" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "" | +| Row 1 | "bbccbb" | "b" | "" | +| Row 2 | "ccddcc" | "c" | "" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -1235,15 +1246,15 @@ ast : trim(both b from 'aba') raw expr : trim_both("aba", ColumnRef(1)::String) checked expr : trim_both("aba", ColumnRef(1)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "b" | -| Row 1 | "bbccbb" | "b" | "aba" | -| Row 2 | "ccddcc" | "c" | "aba" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "b" | +| Row 1 | "bbccbb" | "b" | "aba" | +| Row 2 | "ccddcc" | "c" | "aba" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -1258,7 +1269,7 @@ ast : trim(leading 'a' from 'aaabbaaa') raw expr : trim_leading("aaabbaaa", "a") checked expr : trim_leading("aaabbaaa", "a") output type : String -output domain : {""..} +output domain : Unknown output : "bbaaa" @@ -1266,7 +1277,7 @@ ast : trim(leading 'aa' from 'aaabbaaa') raw expr : trim_leading("aaabbaaa", "aa") checked expr : trim_leading("aaabbaaa", "aa") output type : String -output domain : {""..} +output domain : Unknown output : "abbaaa" @@ -1274,7 +1285,7 @@ ast : trim(leading 'a' from 'aaaaaaaa') raw expr : trim_leading("aaaaaaaa", "a") checked expr : trim_leading("aaaaaaaa", "a") output type : String -output domain : {""..} +output domain : Unknown output : "" @@ -1282,13 +1293,14 @@ ast : trim(leading 'b' from 'aaabbaaa') raw expr : trim_leading("aaabbaaa", "b") checked expr : trim_leading("aaabbaaa", "b") output type : String -output domain : {""..} +output domain : Unknown output : "aaabbaaa" ast : trim(leading 'a' from NULL) raw expr : trim_leading(NULL, "a") checked expr : trim_leading(NULL, CAST("a" AS String NULL)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -1297,6 +1309,7 @@ output : NULL ast : trim(leading NULL from 'aaaaaaaa') raw expr : trim_leading("aaaaaaaa", NULL) checked expr : trim_leading(CAST("aaaaaaaa" AS String NULL), NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -1310,7 +1323,7 @@ evaluation: | | a | b | Output | +--------+-----------------------+-------------+----------+ | Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | | Row 0 | "aabbaa" | "a" | "bbaa" | | Row 1 | "bbccbb" | "b" | "bbccbb" | | Row 2 | "ccddcc" | "c" | "ccddcc" | @@ -1329,15 +1342,15 @@ ast : trim(leading b from a) raw expr : trim_leading(ColumnRef(0)::String, ColumnRef(1)::String) checked expr : trim_leading(ColumnRef(0), ColumnRef(1)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "bbaa" | -| Row 1 | "bbccbb" | "b" | "ccbb" | -| Row 2 | "ccddcc" | "c" | "ddcc" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "bbaa" | +| Row 1 | "bbccbb" | "b" | "ccbb" | +| Row 2 | "ccddcc" | "c" | "ddcc" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -1352,15 +1365,15 @@ ast : trim(leading a from a) raw expr : trim_leading(ColumnRef(0)::String, ColumnRef(0)::String) checked expr : trim_leading(ColumnRef(0), ColumnRef(0)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "" | -| Row 1 | "bbccbb" | "b" | "" | -| Row 2 | "ccddcc" | "c" | "" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "" | +| Row 1 | "bbccbb" | "b" | "" | +| Row 2 | "ccddcc" | "c" | "" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -1375,15 +1388,15 @@ ast : trim(leading b from 'aba') raw expr : trim_leading("aba", ColumnRef(1)::String) checked expr : trim_leading("aba", ColumnRef(1)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "ba" | -| Row 1 | "bbccbb" | "b" | "aba" | -| Row 2 | "ccddcc" | "c" | "aba" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "ba" | +| Row 1 | "bbccbb" | "b" | "aba" | +| Row 2 | "ccddcc" | "c" | "aba" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -1398,7 +1411,7 @@ ast : trim(trailing 'a' from 'aaabbaaa') raw expr : trim_trailing("aaabbaaa", "a") checked expr : trim_trailing("aaabbaaa", "a") output type : String -output domain : {""..} +output domain : Unknown output : "aaabb" @@ -1406,7 +1419,7 @@ ast : trim(trailing 'aa' from 'aaabbaaa') raw expr : trim_trailing("aaabbaaa", "aa") checked expr : trim_trailing("aaabbaaa", "aa") output type : String -output domain : {""..} +output domain : Unknown output : "aaabba" @@ -1414,7 +1427,7 @@ ast : trim(trailing 'a' from 'aaaaaaaa') raw expr : trim_trailing("aaaaaaaa", "a") checked expr : trim_trailing("aaaaaaaa", "a") output type : String -output domain : {""..} +output domain : Unknown output : "" @@ -1422,13 +1435,14 @@ ast : trim(trailing 'b' from 'aaabbaaa') raw expr : trim_trailing("aaabbaaa", "b") checked expr : trim_trailing("aaabbaaa", "b") output type : String -output domain : {""..} +output domain : Unknown output : "aaabbaaa" ast : trim(trailing 'a' from NULL) raw expr : trim_trailing(NULL, "a") checked expr : trim_trailing(NULL, CAST("a" AS String NULL)) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -1437,6 +1451,7 @@ output : NULL ast : trim(trailing NULL from 'aaaaaaaa') raw expr : trim_trailing("aaaaaaaa", NULL) checked expr : trim_trailing(CAST("aaaaaaaa" AS String NULL), NULL) +optimized expr : NULL output type : NULL output domain : {NULL} output : NULL @@ -1450,7 +1465,7 @@ evaluation: | | a | b | Output | +--------+-----------------------+-------------+----------+ | Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | | Row 0 | "aabbaa" | "a" | "aabb" | | Row 1 | "bbccbb" | "b" | "bbccbb" | | Row 2 | "ccddcc" | "c" | "ccddcc" | @@ -1469,15 +1484,15 @@ ast : trim(trailing b from a) raw expr : trim_trailing(ColumnRef(0)::String, ColumnRef(1)::String) checked expr : trim_trailing(ColumnRef(0), ColumnRef(1)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "aabb" | -| Row 1 | "bbccbb" | "b" | "bbcc" | -| Row 2 | "ccddcc" | "c" | "ccdd" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "aabb" | +| Row 1 | "bbccbb" | "b" | "bbcc" | +| Row 2 | "ccddcc" | "c" | "ccdd" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -1492,15 +1507,15 @@ ast : trim(trailing a from a) raw expr : trim_trailing(ColumnRef(0)::String, ColumnRef(0)::String) checked expr : trim_trailing(ColumnRef(0), ColumnRef(0)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "" | -| Row 1 | "bbccbb" | "b" | "" | -| Row 2 | "ccddcc" | "c" | "" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "" | +| Row 1 | "bbccbb" | "b" | "" | +| Row 2 | "ccddcc" | "c" | "" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -1515,15 +1530,15 @@ ast : trim(trailing b from 'aba') raw expr : trim_trailing("aba", ColumnRef(1)::String) checked expr : trim_trailing("aba", ColumnRef(1)) evaluation: -+--------+-----------------------+-------------+--------+ -| | a | b | Output | -+--------+-----------------------+-------------+--------+ -| Type | String | String | String | -| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | {""..} | -| Row 0 | "aabbaa" | "a" | "ab" | -| Row 1 | "bbccbb" | "b" | "aba" | -| Row 2 | "ccddcc" | "c" | "aba" | -+--------+-----------------------+-------------+--------+ ++--------+-----------------------+-------------+---------+ +| | a | b | Output | ++--------+-----------------------+-------------+---------+ +| Type | String | String | String | +| Domain | {"aabbaa"..="ccddcc"} | {"a"..="c"} | Unknown | +| Row 0 | "aabbaa" | "a" | "ab" | +| Row 1 | "bbccbb" | "b" | "aba" | +| Row 2 | "ccddcc" | "c" | "aba" | ++--------+-----------------------+-------------+---------+ evaluation (internal): +--------+------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | From ead1c0c8e7f78007d352af6512766f594584ac7b Mon Sep 17 00:00:00 2001 From: andylokandy Date: Wed, 10 Aug 2022 00:15:51 +0800 Subject: [PATCH 27/59] fix clippy --- common/expression/tests/it/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/expression/tests/it/main.rs b/common/expression/tests/it/main.rs index b5bb3e9ef2a57..ea6c79ec48be7 100644 --- a/common/expression/tests/it/main.rs +++ b/common/expression/tests/it/main.rs @@ -991,7 +991,7 @@ fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Column output_domain .as_ref() .map(ToString::to_string) - .unwrap_or("Unknown".to_string()), + .unwrap_or_else(|| "Unknown".to_string()), result?, ) }; From 607b58ff29f1a1e9fba612578cfe1633ba94bcde Mon Sep 17 00:00:00 2001 From: andylokandy Date: Wed, 10 Aug 2022 00:19:46 +0800 Subject: [PATCH 28/59] remove unused comment --- common/expression/src/values.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/common/expression/src/values.rs b/common/expression/src/values.rs index 30fc48c879a04..e31da0ec26002 100755 --- a/common/expression/src/values.rs +++ b/common/expression/src/values.rs @@ -159,13 +159,7 @@ impl<'a, T: ValueType> ValueRef<'a, T> { (ValueRef::Column(c1), ValueRef::Column(c2)) => c1 == c2, (ValueRef::Scalar(s), ValueRef::Column(c)) | (ValueRef::Column(c), ValueRef::Scalar(s)) => { - for scalar in T::iter_column(c) { - if scalar != *s { - return false; - } - } - true - // T::iter_column(c).all(|scalar| &scalar == s) + T::iter_column(c).all(|scalar| scalar == *s) } } } From 0169bc376d02bd616da7ca5effc968a8181f2f9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=82=8E=E6=B3=BC?= Date: Tue, 9 Aug 2022 23:47:09 +0800 Subject: [PATCH 29/59] refactor(meta-api): introduce Id: record identifier used in KVApi --- common/meta/api/src/id.rs | 43 ++++++++++++++++++++++++++ common/meta/api/src/kv_api_utils.rs | 17 +++++----- common/meta/api/src/lib.rs | 2 ++ common/meta/api/src/schema_api_impl.rs | 2 +- common/meta/api/src/schema_api_keys.rs | 4 +-- common/meta/api/src/share_api_keys.rs | 2 +- 6 files changed, 58 insertions(+), 12 deletions(-) create mode 100644 common/meta/api/src/id.rs diff --git a/common/meta/api/src/id.rs b/common/meta/api/src/id.rs new file mode 100644 index 0000000000000..dacb2aba3d2f3 --- /dev/null +++ b/common/meta/api/src/id.rs @@ -0,0 +1,43 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::ops::Deref; + +/// The identifier of a internal record used in an application upon KVApi. +/// +/// E.g. TableId, DatabaseId. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Id(pub u64); + +impl Id { + pub fn new(i: u64) -> Self { + Id(i) + } +} + +/// Convert primitive u64 to Id. +impl From for Id { + fn from(i: u64) -> Self { + Id(i) + } +} + +/// Use `Id` as if using a `u64` +impl Deref for Id { + type Target = u64; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/common/meta/api/src/kv_api_utils.rs b/common/meta/api/src/kv_api_utils.rs index ea1b8a4b85660..04ed1bce54595 100644 --- a/common/meta/api/src/kv_api_utils.rs +++ b/common/meta/api/src/kv_api_utils.rs @@ -36,6 +36,7 @@ use common_meta_types::UpsertKVReq; use common_proto_conv::FromToProto; use tracing::debug; +use crate::Id; use crate::KVApi; use crate::KVApiKey; @@ -56,7 +57,7 @@ pub async fn get_u64_value( let res = kv_api.get_kv(&key.to_key()).await?; if let Some(seq_v) = res { - Ok((seq_v.seq, deserialize_u64(&seq_v.data)?)) + Ok((seq_v.seq, *deserialize_u64(&seq_v.data)?)) } else { Ok((0, 0)) } @@ -83,9 +84,14 @@ where } } -pub fn deserialize_u64(v: &[u8]) -> Result { +pub fn serialize_u64(value: impl Into) -> Result, MetaError> { + let v = serde_json::to_vec(&*value.into()).map_err(meta_encode_err)?; + Ok(v) +} + +pub fn deserialize_u64(v: &[u8]) -> Result { let id = serde_json::from_slice(v).map_err(meta_encode_err)?; - Ok(id) + Ok(Id::new(id)) } /// Generate an id on metasrv. @@ -107,11 +113,6 @@ pub async fn fetch_id(kv_api: &impl KVApi, generator: T) -> Result< Ok(seq_v.seq) } -pub fn serialize_u64(value: u64) -> Result, MetaError> { - let v = serde_json::to_vec(&value).map_err(meta_encode_err)?; - Ok(v) -} - pub fn serialize_struct(value: &T) -> Result, MetaError> where T: FromToProto + 'static, diff --git a/common/meta/api/src/lib.rs b/common/meta/api/src/lib.rs index 4384b214187d2..c9957e9c29ce9 100644 --- a/common/meta/api/src/lib.rs +++ b/common/meta/api/src/lib.rs @@ -15,6 +15,7 @@ #![deny(unused_crate_dependencies)] extern crate common_meta_types; +mod id; mod kv_api; mod kv_api_key; mod kv_api_test_suite; @@ -28,6 +29,7 @@ mod share_api_impl; mod share_api_keys; mod share_api_test_suite; +pub use id::Id; pub use kv_api::get_start_and_end_of_prefix; pub use kv_api::prefix_of_string; pub use kv_api::ApiBuilder; diff --git a/common/meta/api/src/schema_api_impl.rs b/common/meta/api/src/schema_api_impl.rs index 031b7d8a6609f..1a35c72a8ff2c 100644 --- a/common/meta/api/src/schema_api_impl.rs +++ b/common/meta/api/src/schema_api_impl.rs @@ -2116,7 +2116,7 @@ async fn list_u64_value( let mut values = Vec::with_capacity(n); for (str_key, seqv) in res.iter() { - let id = deserialize_u64(&seqv.data).map_err(meta_encode_err)?; + let id = *deserialize_u64(&seqv.data).map_err(meta_encode_err)?; values.push(id); // Parse key and get db_name: diff --git a/common/meta/api/src/schema_api_keys.rs b/common/meta/api/src/schema_api_keys.rs index 958113ec0029a..0d9c116c4090c 100644 --- a/common/meta/api/src/schema_api_keys.rs +++ b/common/meta/api/src/schema_api_keys.rs @@ -48,11 +48,11 @@ const PREFIX_DATABASE_ID_TO_NAME: &str = "__fd_database_id_to_name"; const PREFIX_TABLE_ID_TO_NAME: &str = "__fd_table_id_to_name"; /// Key for database id generator -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct DatabaseIdGen {} /// Key for table id generator -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct TableIdGen {} /// __fd_database// -> diff --git a/common/meta/api/src/share_api_keys.rs b/common/meta/api/src/share_api_keys.rs index b2dcc73f233f4..f3ff535b877c8 100644 --- a/common/meta/api/src/share_api_keys.rs +++ b/common/meta/api/src/share_api_keys.rs @@ -36,7 +36,7 @@ const PREFIX_SHARE_ID_TO_NAME: &str = "__fd_share_id_to_name"; const PREFIX_SHARE_ACCOUNT_ID: &str = "__fd_share_account_id"; /// Key for share id generator -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ShareIdGen {} impl KVApiKey for ShareIdGen { From 79a32d22b1a83723f9a4dcc810a1024864f986a6 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Wed, 10 Aug 2022 09:15:41 +0800 Subject: [PATCH 30/59] feat(query): address comments --- common/expression/src/kernels/concat.rs | 47 ++--- common/expression/src/kernels/filter.rs | 1 - common/expression/src/kernels/scatter.rs | 25 ++- common/expression/src/kernels/take.rs | 59 +++--- common/expression/src/types.rs | 17 +- common/expression/src/types/boolean.rs | 11 +- common/expression/src/types/empty_array.rs | 7 + common/expression/src/types/null.rs | 7 + common/expression/src/types/nullable.rs | 7 +- common/expression/src/types/number.rs | 11 +- common/expression/src/types/string.rs | 4 - common/expression/tests/it/kernel.rs | 16 +- .../tests/it/testdata/kernel-pass.txt | 175 +++++++++--------- 13 files changed, 186 insertions(+), 201 deletions(-) diff --git a/common/expression/src/kernels/concat.rs b/common/expression/src/kernels/concat.rs index abae1d8ffb04e..25f67494677dd 100644 --- a/common/expression/src/kernels/concat.rs +++ b/common/expression/src/kernels/concat.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_arrow::arrow::bitmap::MutableBitmap; -use common_arrow::arrow::buffer::Buffer; use common_exception::ErrorCode; use common_exception::Result; @@ -21,6 +19,7 @@ use crate::types::array::ArrayColumnBuilder; use crate::types::nullable::NullableColumn; use crate::types::string::StringColumnBuilder; use crate::types::AnyType; +use crate::types::ArgType; use crate::types::ArrayType; use crate::types::BooleanType; use crate::types::EmptyArrayType; @@ -73,36 +72,28 @@ impl Column { } let capacity = columns.iter().map(|c| c.len()).sum(); - with_number_mapped_type!(SRC_TYPE, match &columns[0] { - Column::SRC_TYPE(_) => { - let mut values = Vec::with_capacity(columns.len()); - for c in columns.iter() { - let value = NumberType::::try_downcast_column(c).unwrap(); - values.push(value.clone()); - } - NumberType::::upcast_column(Self::concat_primitive_types(&values)) + with_number_mapped_type!(NUM_TYPE, match &columns[0] { + Column::NUM_TYPE(_) => { + Self::concat_arg_types::>(columns) } Column::Null { .. } => { - let builder: usize = 0; - Self::concat_scalar_types::(builder, columns) + Self::concat_arg_types::(columns) } Column::EmptyArray { .. } => { - let builder: usize = 0; - Self::concat_scalar_types::(builder, columns) + Self::concat_arg_types::(columns) } Column::Boolean(_) => { - let builder = MutableBitmap::with_capacity(capacity); - Self::concat_scalar_types::(builder, columns) + Self::concat_arg_types::(columns) } Column::String(_) => { let data_capacity = columns.iter().map(|c| c.memory_size() - c.len() * 8).sum(); let builder = StringColumnBuilder::with_capacity(capacity, data_capacity); - Self::concat_scalar_types::(builder, columns) + Self::concat_value_types::(builder, columns) } Column::Array(col) => { let mut builder = ArrayColumnBuilder::::from_column(col.slice(0..0)); builder.reserve(capacity); - Self::concat_scalar_types::>(builder, columns) + Self::concat_value_types::>(builder, columns) } Column::Nullable(_) => { let mut bitmaps = Vec::with_capacity(columns.len()); @@ -114,8 +105,7 @@ impl Column { } let column = Self::concat(&inners); - let validity_builder = MutableBitmap::with_capacity(capacity); - let validity = Self::concat_scalar_types::(validity_builder, &bitmaps); + let validity = Self::concat_arg_types::(&bitmaps); let validity = BooleanType::try_downcast_column(&validity).unwrap(); Column::Nullable(Box::new(NullableColumn { column, validity })) @@ -138,16 +128,17 @@ impl Column { }) } - fn concat_primitive_types(values: &[Buffer]) -> Buffer { - let capacity = values.iter().map(|c| c.len()).sum(); - let mut results = Vec::with_capacity(capacity); - for value in values { - results.extend_from_slice(value.as_slice()); - } - results.into() + fn concat_arg_types(columns: &[Column]) -> Column { + let columns: Vec = columns + .iter() + .map(|c| T::try_downcast_column(c).unwrap()) + .collect(); + let iter = columns.iter().flat_map(|c| T::iter_column(c)); + let result = T::column_from_ref_iter(iter, &[]); + T::upcast_column(result) } - fn concat_scalar_types( + fn concat_value_types( mut builder: T::ColumnBuilder, columns: &[Column], ) -> Column { diff --git a/common/expression/src/kernels/filter.rs b/common/expression/src/kernels/filter.rs index 252bac6c71c41..5ba8fa39f5224 100644 --- a/common/expression/src/kernels/filter.rs +++ b/common/expression/src/kernels/filter.rs @@ -113,7 +113,6 @@ impl Column { Column::SRC_TYPE(Self::filter_primitive_types(values, filter)) } Column::Null { .. } | Column::EmptyArray { .. } => self.slice(0..length), - Column::Boolean(bm) => Self::filter_scalar_types::( bm, MutableBitmap::with_capacity(length), diff --git a/common/expression/src/kernels/scatter.rs b/common/expression/src/kernels/scatter.rs index 2442ca985b155..8abd868a1ca04 100644 --- a/common/expression/src/kernels/scatter.rs +++ b/common/expression/src/kernels/scatter.rs @@ -54,7 +54,7 @@ impl Chunk { let mut scattered_chunks = Vec::with_capacity(scatter_size); for index in 0..scatter_size { - let mut chunk_columns = vec![]; + let mut chunk_columns = Vec::with_capacity(scattered_columns.len()); let mut size = 0; for item in scattered_columns.iter() { size = item[index].len(); @@ -84,8 +84,8 @@ impl Column { pub fn scatter(&self, indices: &[I], scatter_size: usize) -> Vec { let length = indices.len(); - with_number_mapped_type!(SRC_TYPE, match self { - Column::SRC_TYPE(values) => Self::scatter_scalars::, _>( + with_number_mapped_type!(NUM_TYPE, match self { + Column::NUM_TYPE(values) => Self::scatter_scalars::, _>( values, Vec::with_capacity(length), indices, @@ -130,7 +130,7 @@ impl Column { ); columns .iter() - .zip(validitys.iter()) + .zip(&validitys) .map(|(column, validity)| { Column::Nullable(Box::new(NullableColumn { column: column.clone(), @@ -140,23 +140,20 @@ impl Column { .collect() } Column::Tuple { fields, .. } => { - let fields_vs: Vec> = fields + let mut fields_vs: Vec> = fields .iter() .map(|c| c.scatter(indices, scatter_size)) .collect(); (0..scatter_size) .map(|index| { - let mut columns = Vec::with_capacity(fields.len()); - let mut len = 0; - for field in fields_vs.iter() { - len = field[index].len(); - columns.push(field[index].clone()); - } - + let fields: Vec = fields_vs + .iter_mut() + .map(|field| field.remove(index)) + .collect(); Column::Tuple { - fields: columns, - len, + len: fields.first().map(|f| f.len()).unwrap_or(0), + fields, } }) .collect() diff --git a/common/expression/src/kernels/take.rs b/common/expression/src/kernels/take.rs index bc4548b701063..24e79cebd5cfe 100644 --- a/common/expression/src/kernels/take.rs +++ b/common/expression/src/kernels/take.rs @@ -12,20 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_arrow::arrow::bitmap::MutableBitmap; -use common_arrow::arrow::buffer::Buffer; use common_arrow::arrow::types::Index; use common_exception::Result; use crate::types::array::ArrayColumnBuilder; use crate::types::nullable::NullableColumn; -use crate::types::string::StringColumnBuilder; use crate::types::AnyType; +use crate::types::ArgType; use crate::types::ArrayType; use crate::types::BooleanType; +use crate::types::NumberType; use crate::types::StringType; use crate::types::ValueType; -use crate::with_number_type; +use crate::with_number_mapped_type; use crate::Chunk; use crate::Column; use crate::Value; @@ -50,34 +49,22 @@ impl Chunk { impl Column { pub fn take(&self, indices: &[I]) -> Self { let length = indices.len(); - with_number_type!(SRC_TYPE, match self { - Column::SRC_TYPE(values) => { - Column::SRC_TYPE(Self::take_primitives(values, indices)) + with_number_mapped_type!(NUM_TYPE, match self { + Column::NUM_TYPE(values) => { + Self::take_arg_types::, _>(values, indices) } Column::Null { .. } | Column::EmptyArray { .. } => self.slice(0..length), - Column::Boolean(bm) => Self::take_scalars::( - bm, - MutableBitmap::with_capacity(length), - indices - ), - Column::String(column) => Self::take_scalars::( - column, - StringColumnBuilder::with_capacity(length, 0), - indices - ), + Column::Boolean(bm) => Self::take_arg_types::(bm, indices), + Column::String(column) => Self::take_arg_types::(column, indices), Column::Array(column) => { let mut builder = ArrayColumnBuilder::::from_column(column.slice(0..0)); builder.reserve(length); - Self::take_scalars::, _>(column, builder, indices) + Self::take_value_types::, _>(column, builder, indices) } Column::Nullable(c) => { let column = c.column.take(indices); - let validity = Self::take_scalars::( - &c.validity, - MutableBitmap::with_capacity(length), - indices, - ); + let validity = Self::take_arg_types::(&c.validity, indices); Column::Nullable(Box::new(NullableColumn { column, validity: BooleanType::try_downcast_column(&validity).unwrap(), @@ -93,7 +80,17 @@ impl Column { }) } - fn take_scalars( + fn take_arg_types(col: &T::Column, indices: &[I]) -> Column { + let col = T::column_from_ref_iter( + indices + .iter() + .map(|index| unsafe { T::index_column_unchecked(col, index.to_usize()) }), + &[], + ); + T::upcast_column(col) + } + + fn take_value_types( col: &T::Column, mut builder: T::ColumnBuilder, indices: &[I], @@ -108,18 +105,4 @@ impl Column { } T::upcast_column(T::build_column(builder)) } - - fn take_primitives(col: &Buffer, indices: &[I]) -> Buffer { - let mut vs: Vec = Vec::with_capacity(indices.len()); - let mut dst = vs.as_mut_ptr(); - for index in indices { - unsafe { - let e = col[index.to_usize()]; - dst.write(e); - dst = dst.add(1); - } - } - unsafe { vs.set_len(indices.len()) }; - vs.into() - } } diff --git a/common/expression/src/types.rs b/common/expression/src/types.rs index c11c898208b4f..d8f6595799e92 100755 --- a/common/expression/src/types.rs +++ b/common/expression/src/types.rs @@ -104,12 +104,7 @@ pub trait ValueType: Debug + Clone + PartialEq + Sized + 'static { ) -> Self::ScalarRef<'a>; fn slice_column<'a>(col: &'a Self::Column, range: Range) -> Self::Column; fn iter_column<'a>(col: &'a Self::Column) -> Self::ColumnIterator<'a>; - fn column_to_builder(col: Self::Column) -> Self::ColumnBuilder; - fn column_init_builder(col: &Self::Column, _capacity: usize) -> Self::ColumnBuilder { - let col = Self::slice_column(col, 0..0); - Self::column_to_builder(col) - } fn builder_len(builder: &Self::ColumnBuilder) -> usize; fn push_item(builder: &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>); @@ -123,6 +118,7 @@ pub trait ArgType: ValueType { fn data_type() -> DataType; fn full_domain(generics: &GenericMap) -> Self::Domain; fn create_builder(capacity: usize, generics: &GenericMap) -> Self::ColumnBuilder; + fn column_from_iter( iter: impl Iterator, generics: &GenericMap, @@ -133,6 +129,17 @@ pub trait ArgType: ValueType { } Self::build_column(col) } + + fn column_from_ref_iter<'a>( + iter: impl Iterator>, + generics: &GenericMap, + ) -> Self::Column { + let mut col = Self::create_builder(iter.size_hint().0, generics); + for item in iter { + Self::push_item(&mut col, item); + } + Self::build_column(col) + } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/common/expression/src/types/boolean.rs b/common/expression/src/types/boolean.rs index 8683fadd25fc6..325f8be590e25 100644 --- a/common/expression/src/types/boolean.rs +++ b/common/expression/src/types/boolean.rs @@ -104,10 +104,6 @@ impl ValueType for BooleanType { bitmap_into_mut(col) } - fn column_init_builder(_col: &Self::Column, capacity: usize) -> Self::ColumnBuilder { - MutableBitmap::with_capacity(capacity) - } - fn builder_len(builder: &Self::ColumnBuilder) -> usize { builder.len() } @@ -153,4 +149,11 @@ impl ArgType for BooleanType { fn column_from_iter(iter: impl Iterator, _: &GenericMap) -> Self::Column { iter.collect() } + + fn column_from_ref_iter<'a>( + iter: impl Iterator>, + _: &GenericMap, + ) -> Self::Column { + iter.collect() + } } diff --git a/common/expression/src/types/empty_array.rs b/common/expression/src/types/empty_array.rs index 52547af9679ba..1b9943a3779ac 100644 --- a/common/expression/src/types/empty_array.rs +++ b/common/expression/src/types/empty_array.rs @@ -141,4 +141,11 @@ impl ArgType for EmptyArrayType { fn column_from_iter(iter: impl Iterator, _: &GenericMap) -> Self::Column { iter.count() } + + fn column_from_ref_iter<'a>( + iter: impl Iterator>, + _: &GenericMap, + ) -> Self::Column { + iter.count() + } } diff --git a/common/expression/src/types/null.rs b/common/expression/src/types/null.rs index 68f10ee2e4308..9799284a8f194 100644 --- a/common/expression/src/types/null.rs +++ b/common/expression/src/types/null.rs @@ -148,4 +148,11 @@ impl ArgType for NullType { fn column_from_iter(iter: impl Iterator, _: &GenericMap) -> Self::Column { iter.count() } + + fn column_from_ref_iter<'a>( + iter: impl Iterator>, + _: &GenericMap, + ) -> Self::Column { + iter.count() + } } diff --git a/common/expression/src/types/nullable.rs b/common/expression/src/types/nullable.rs index 7db510df1e7ca..9e41d333a89a0 100755 --- a/common/expression/src/types/nullable.rs +++ b/common/expression/src/types/nullable.rs @@ -184,10 +184,9 @@ impl NullableColumn { /// /// Calling this method with an out-of-bounds index is *[undefined behavior]* pub unsafe fn index_unchecked(&self, index: usize) -> Option> { - match self.validity.get(index) { - Some(true) => Some(T::index_column(&self.column, index).unwrap()), - Some(false) => None, - _ => None, + match self.validity.get_bit_unchecked(index) { + true => Some(T::index_column(&self.column, index).unwrap()), + false => None, } } diff --git a/common/expression/src/types/number.rs b/common/expression/src/types/number.rs index 4c08b848b5f94..5cb47801d361e 100644 --- a/common/expression/src/types/number.rs +++ b/common/expression/src/types/number.rs @@ -116,10 +116,6 @@ impl ValueType for NumberType { buffer_into_mut(col) } - fn column_init_builder(_col: &Self::Column, capacity: usize) -> Self::ColumnBuilder { - Vec::with_capacity(capacity) - } - fn builder_len(builder: &Self::ColumnBuilder) -> usize { builder.len() } @@ -162,6 +158,13 @@ impl ArgType for NumberType { fn column_from_iter(iter: impl Iterator, _: &GenericMap) -> Self::Column { iter.collect() } + + fn column_from_ref_iter<'a>( + iter: impl Iterator>, + _: &GenericMap, + ) -> Self::Column { + iter.collect() + } } impl Number for u8 { diff --git a/common/expression/src/types/string.rs b/common/expression/src/types/string.rs index 5ec5c5cab0712..1f09adab19bb8 100644 --- a/common/expression/src/types/string.rs +++ b/common/expression/src/types/string.rs @@ -99,10 +99,6 @@ impl ValueType for StringType { StringColumnBuilder::from_column(col) } - fn column_init_builder(_col: &Self::Column, capacity: usize) -> Self::ColumnBuilder { - StringColumnBuilder::with_capacity(capacity, 0) - } - fn builder_len(builder: &Self::ColumnBuilder) -> usize { builder.len() } diff --git a/common/expression/tests/it/kernel.rs b/common/expression/tests/it/kernel.rs index 6ab593d1e69a5..ce05fef827f28 100644 --- a/common/expression/tests/it/kernel.rs +++ b/common/expression/tests/it/kernel.rs @@ -153,9 +153,9 @@ fn run_filter(file: &mut impl Write, predicate: Column, columns: &[Column]) { match result { Ok(result_chunk) => { - writeln!(file, "Filter: {predicate:?}").unwrap(); - writeln!(file, "Source:\n{chunk:?}").unwrap(); - writeln!(file, "Result:\n{result_chunk:?}").unwrap(); + writeln!(file, "Filter: {predicate}").unwrap(); + writeln!(file, "Source:\n{chunk}").unwrap(); + writeln!(file, "Result:\n{result_chunk}").unwrap(); write!(file, "\n\n").unwrap(); } Err(err) => { @@ -182,7 +182,7 @@ fn run_concat(file: &mut impl Write, columns: Vec>) { writeln!(file, "Concat-Column {}:", i).unwrap(); writeln!(file, "{:?}", c).unwrap(); } - writeln!(file, "Result:\n{result_chunk:?}").unwrap(); + writeln!(file, "Result:\n{result_chunk}").unwrap(); write!(file, "\n\n").unwrap(); } Err(err) => { @@ -201,8 +201,8 @@ fn run_take(file: &mut impl Write, indices: &[u32], columns: &[Column]) { match result { Ok(result_chunk) => { writeln!(file, "Take: {indices:?}").unwrap(); - writeln!(file, "Source:\n{chunk:?}").unwrap(); - writeln!(file, "Result:\n{result_chunk:?}").unwrap(); + writeln!(file, "Source:\n{chunk}").unwrap(); + writeln!(file, "Result:\n{result_chunk}").unwrap(); write!(file, "\n\n").unwrap(); } Err(err) => { @@ -221,10 +221,10 @@ fn run_scatter(file: &mut impl Write, columns: &[Column], indices: &[u32], scatt match result { Ok(result_chunk) => { writeln!(file, "Scatter: {indices:?}").unwrap(); - writeln!(file, "Source:\n{chunk:?}").unwrap(); + writeln!(file, "Source:\n{chunk}").unwrap(); for (i, c) in result_chunk.iter().enumerate() { - writeln!(file, "Result-{i}:\n{c:?}").unwrap(); + writeln!(file, "Result-{i}:\n{c}").unwrap(); } write!(file, "\n\n").unwrap(); } diff --git a/common/expression/tests/it/testdata/kernel-pass.txt b/common/expression/tests/it/testdata/kernel-pass.txt index c03b43427a17f..4d35c162d8a5a 100644 --- a/common/expression/tests/it/testdata/kernel-pass.txt +++ b/common/expression/tests/it/testdata/kernel-pass.txt @@ -1,43 +1,40 @@ -Filter: Column(Boolean([0b___10001])) +Filter: Boolean([0b___10001]) Source: -+-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column ID | Column Data | -+-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ -| 0 | Column(Int32([0, 1, 2, 3, -4])) | -| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 11, 12, 13, 14]), validity: [0b___00010] })) | -| 2 | Column(Null { len: 5 }) | -| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [97, 98, 99, 100, 101], offsets: [0, 1, 2, 3, 4, 5] }), validity: [0b___00011] })) | -+-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ ++----------+----------+----------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++----------+----------+----------+----------+ +| 0 | NULL | NULL | "a" | +| 1 | 11 | NULL | "b" | +| 2 | NULL | NULL | NULL | +| 3 | NULL | NULL | NULL | +| -4 | NULL | NULL | NULL | ++----------+----------+----------+----------+ Result: -+-----------+-----------------------------------------------------------------------------------------------------------------------------------+ -| Column ID | Column Data | -+-----------+-----------------------------------------------------------------------------------------------------------------------------------+ -| 0 | Column(Int32([0, -4])) | -| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 14]), validity: [0b______00] })) | -| 2 | Column(Null { len: 2 }) | -| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [97, 101], offsets: [0, 1, 2] }), validity: [0b______01] })) | -+-----------+-----------------------------------------------------------------------------------------------------------------------------------+ ++----------+----------+----------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++----------+----------+----------+----------+ +| 0 | NULL | NULL | "a" | +| -4 | NULL | NULL | NULL | ++----------+----------+----------+----------+ -Filter: Column(Nullable(NullableColumn { column: Boolean([0b___11011]), validity: [0b___00110] })) +Filter: Nullable(NullableColumn { column: Boolean([0b___11011]), validity: [0b___00110] }) Source: -+-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column ID | Column Data | -+-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ -| 0 | Column(Int32([0, 1, 2, 3, -4])) | -| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 11, 12, 13, 14]), validity: [0b___00010] })) | -| 2 | Column(Null { len: 5 }) | -| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 121, 122, 97, 98], offsets: [0, 1, 2, 3, 4, 5] }), validity: [0b___00110] })) | -+-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ ++----------+----------+----------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++----------+----------+----------+----------+ +| 0 | NULL | NULL | NULL | +| 1 | 11 | NULL | "y" | +| 2 | NULL | NULL | "z" | +| 3 | NULL | NULL | NULL | +| -4 | NULL | NULL | NULL | ++----------+----------+----------+----------+ Result: -+-----------+----------------------------------------------------------------------------------------------------------------------------+ -| Column ID | Column Data | -+-----------+----------------------------------------------------------------------------------------------------------------------------+ -| 0 | Column(Int32([1])) | -| 1 | Column(Nullable(NullableColumn { column: UInt8([11]), validity: [0b_______1] })) | -| 2 | Column(Null { len: 1 }) | -| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [121], offsets: [0, 1] }), validity: [0b_______1] })) | -+-----------+----------------------------------------------------------------------------------------------------------------------------+ ++----------+----------+----------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++----------+----------+----------+----------+ +| 1 | 11 | NULL | "y" | ++----------+----------+----------+----------+ Concat-Column 0: @@ -61,74 +58,70 @@ Concat-Column 1: | 4 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 121], offsets: [0, 1, 2] }), validity: [0b______10] })) | +-----------+------------------------------------------------------------------------------------------------------------------------------------+ Result: -+-----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column ID | Column Data | -+-----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| 0 | Column(Int32([0, 1, 2, 3, -4, 5, 6])) | -| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 11, 12, 13, 14, 15, 16]), validity: [0b_1000010] })) | -| 2 | Column(Null { len: 7 }) | -| 3 | Column(EmptyArray { len: 7 }) | -| 4 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 121, 122, 97, 98, 120, 121], offsets: [0, 1, 2, 3, 4, 5, 6, 7] }), validity: [0b_1000110] })) | -+-----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++----------+----------+----------+----------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | Column 4 | ++----------+----------+----------+----------+----------+ +| 0 | NULL | NULL | [] | NULL | +| 1 | 11 | NULL | [] | "y" | +| 2 | NULL | NULL | [] | "z" | +| 3 | NULL | NULL | [] | NULL | +| -4 | NULL | NULL | [] | NULL | +| 5 | NULL | NULL | [] | NULL | +| 6 | 16 | NULL | [] | "y" | ++----------+----------+----------+----------+----------+ Take: [0, 3, 1] Source: -+-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column ID | Column Data | -+-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ -| 0 | Column(Int32([0, 1, 2, 3, -4])) | -| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 11, 12, 13, 14]), validity: [0b___00010] })) | -| 2 | Column(Null { len: 5 }) | -| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 121, 122, 97, 98], offsets: [0, 1, 2, 3, 4, 5] }), validity: [0b___00110] })) | -+-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ ++----------+----------+----------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++----------+----------+----------+----------+ +| 0 | NULL | NULL | NULL | +| 1 | 11 | NULL | "y" | +| 2 | NULL | NULL | "z" | +| 3 | NULL | NULL | NULL | +| -4 | NULL | NULL | NULL | ++----------+----------+----------+----------+ Result: -+-----------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| Column ID | Column Data | -+-----------+-------------------------------------------------------------------------------------------------------------------------------------------+ -| 0 | Column(Int32([0, 3, 1])) | -| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 13, 11]), validity: [0b_____100] })) | -| 2 | Column(Null { len: 3 }) | -| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 97, 121], offsets: [0, 1, 2, 3] }), validity: [0b_____100] })) | -+-----------+-------------------------------------------------------------------------------------------------------------------------------------------+ ++----------+----------+----------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++----------+----------+----------+----------+ +| 0 | NULL | NULL | NULL | +| 3 | NULL | NULL | NULL | +| 1 | 11 | NULL | "y" | ++----------+----------+----------+----------+ Scatter: [0, 0, 1, 2, 1] Source: -+-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column ID | Column Data | -+-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ -| 0 | Column(Int32([0, 1, 2, 3, -4])) | -| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 11, 12, 13, 14]), validity: [0b___00010] })) | -| 2 | Column(Null { len: 5 }) | -| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 121, 122, 97, 98], offsets: [0, 1, 2, 3, 4, 5] }), validity: [0b___00110] })) | -+-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ ++----------+----------+----------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++----------+----------+----------+----------+ +| 0 | NULL | NULL | NULL | +| 1 | 11 | NULL | "y" | +| 2 | NULL | NULL | "z" | +| 3 | NULL | NULL | NULL | +| -4 | NULL | NULL | NULL | ++----------+----------+----------+----------+ Result-0: -+-----------+------------------------------------------------------------------------------------------------------------------------------------+ -| Column ID | Column Data | -+-----------+------------------------------------------------------------------------------------------------------------------------------------+ -| 0 | Column(Int32([0, 1])) | -| 1 | Column(Nullable(NullableColumn { column: UInt8([10, 11]), validity: [0b______10] })) | -| 2 | Column(Null { len: 2 }) | -| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [120, 121], offsets: [0, 1, 2] }), validity: [0b______10] })) | -+-----------+------------------------------------------------------------------------------------------------------------------------------------+ ++----------+----------+----------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++----------+----------+----------+----------+ +| 0 | NULL | NULL | NULL | +| 1 | 11 | NULL | "y" | ++----------+----------+----------+----------+ Result-1: -+-----------+-----------------------------------------------------------------------------------------------------------------------------------+ -| Column ID | Column Data | -+-----------+-----------------------------------------------------------------------------------------------------------------------------------+ -| 0 | Column(Int32([2, -4])) | -| 1 | Column(Nullable(NullableColumn { column: UInt8([12, 14]), validity: [0b______00] })) | -| 2 | Column(Null { len: 2 }) | -| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [122, 98], offsets: [0, 1, 2] }), validity: [0b______01] })) | -+-----------+-----------------------------------------------------------------------------------------------------------------------------------+ ++----------+----------+----------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++----------+----------+----------+----------+ +| 2 | NULL | NULL | "z" | +| -4 | NULL | NULL | NULL | ++----------+----------+----------+----------+ Result-2: -+-----------+---------------------------------------------------------------------------------------------------------------------------+ -| Column ID | Column Data | -+-----------+---------------------------------------------------------------------------------------------------------------------------+ -| 0 | Column(Int32([3])) | -| 1 | Column(Nullable(NullableColumn { column: UInt8([13]), validity: [0b_______0] })) | -| 2 | Column(Null { len: 1 }) | -| 3 | Column(Nullable(NullableColumn { column: String(StringColumn { data: [97], offsets: [0, 1] }), validity: [0b_______0] })) | -+-----------+---------------------------------------------------------------------------------------------------------------------------+ ++----------+----------+----------+----------+ +| Column 0 | Column 1 | Column 2 | Column 3 | ++----------+----------+----------+----------+ +| 3 | NULL | NULL | NULL | ++----------+----------+----------+----------+ From 598822780a36b93fcfe96df8802d3d9c346a05f1 Mon Sep 17 00:00:00 2001 From: elijah Date: Tue, 9 Aug 2022 22:00:46 +0800 Subject: [PATCH 31/59] chore: rename date time functions --- common/functions/src/scalars/dates/date.rs | 77 ++++++++++--------- .../30-datetime-functions/addinterval.md | 38 ++++----- .../30-datetime-functions/subtractinterval.md | 38 ++++----- .../30-datetime-functions/timeslot.md | 12 +-- .../30-datetime-functions/toMonth.md | 12 +-- .../30-datetime-functions/todayofmonth.md | 12 +-- .../30-datetime-functions/todayofweek.md | 12 +-- .../30-datetime-functions/todayofyear.md | 12 +-- .../30-datetime-functions/tohour.md | 12 +-- .../30-datetime-functions/tominute.md | 12 +-- .../30-datetime-functions/tomonday.md | 16 ++-- .../30-datetime-functions/tosecond.md | 12 +-- .../30-datetime-functions/tostartofday.md | 12 +-- .../tostartoffifteenminutes.md | 12 +-- .../tostartoffiveminutes.md | 12 +-- .../30-datetime-functions/tostartofhour.md | 12 +-- .../30-datetime-functions/tostartofisoyear.md | 12 +-- .../30-datetime-functions/tostartofminute.md | 12 +-- .../30-datetime-functions/tostartofmonth.md | 12 +-- .../30-datetime-functions/tostartofquarter.md | 12 +-- .../30-datetime-functions/tostartofsecond.md | 8 +- .../tostartoftenminutes.md | 12 +-- .../30-datetime-functions/tostartofweek.md | 12 +-- .../30-datetime-functions/tostartofyear.md | 12 +-- .../30-datetime-functions/toyear.md | 12 +-- .../30-datetime-functions/toyyyymm.md | 8 +- .../30-datetime-functions/toyyyymmdd.md | 12 +-- .../30-datetime-functions/toyyyymmddhhmmss.md | 12 +-- 28 files changed, 226 insertions(+), 223 deletions(-) diff --git a/common/functions/src/scalars/dates/date.rs b/common/functions/src/scalars/dates/date.rs index b68dabd44665c..2ff05c1d2c62b 100644 --- a/common/functions/src/scalars/dates/date.rs +++ b/common/functions/src/scalars/dates/date.rs @@ -70,64 +70,67 @@ impl DateFunction { factory.register("yesterday", YesterdayFunction::desc()); factory.register("tomorrow", TomorrowFunction::desc()); factory.register("now", NowFunction::desc()); - factory.register("toYYYYMM", ToYYYYMMFunction::desc()); - factory.register("toYYYYMMDD", ToYYYYMMDDFunction::desc()); - factory.register("toYYYYMMDDhhmmss", ToYYYYMMDDhhmmssFunction::desc()); - factory.register("toStartOfYear", ToStartOfYearFunction::desc()); - factory.register("toStartOfISOYear", ToStartOfISOYearFunction::desc()); - factory.register("toStartOfQuarter", ToStartOfQuarterFunction::desc()); + factory.register("to_yyyymm", ToYYYYMMFunction::desc()); + factory.register("to_yyyymmdd", ToYYYYMMDDFunction::desc()); + factory.register("to_yyyymmddhhmmss", ToYYYYMMDDhhmmssFunction::desc()); + factory.register("to_start_of_year", ToStartOfYearFunction::desc()); + factory.register("to_start_of_iso_year", ToStartOfISOYearFunction::desc()); + factory.register("to_start_of_quarter", ToStartOfQuarterFunction::desc()); - factory.register("toStartOfMonth", ToStartOfMonthFunction::desc()); - factory.register("toMonth", ToMonthFunction::desc()); - factory.register("toDayOfYear", ToDayOfYearFunction::desc()); - factory.register("toDayOfMonth", ToDayOfMonthFunction::desc()); - factory.register("toDayOfWeek", ToDayOfWeekFunction::desc()); - factory.register("toHour", ToHourFunction::desc()); - factory.register("toMinute", ToMinuteFunction::desc()); - factory.register("toSecond", ToSecondFunction::desc()); - factory.register("toMonday", ToMondayFunction::desc()); - factory.register("toYear", ToYearFunction::desc()); + factory.register("to_start_of_month", ToStartOfMonthFunction::desc()); + factory.register("to_month", ToMonthFunction::desc()); + factory.register("to_day_of_year", ToDayOfYearFunction::desc()); + factory.register("to_day_of_month", ToDayOfMonthFunction::desc()); + factory.register("to_day_of_week", ToDayOfWeekFunction::desc()); + factory.register("to_hour", ToHourFunction::desc()); + factory.register("to_minute", ToMinuteFunction::desc()); + factory.register("to_second", ToSecondFunction::desc()); + factory.register("to_monday", ToMondayFunction::desc()); + factory.register("to_year", ToYearFunction::desc()); // rounders factory.register( - "toStartOfSecond", + "to_start_of_second", Self::round_function_creator(Round::Second), ); factory.register( - "toStartOfMinute", + "to_start_of_minute", Self::round_function_creator(Round::Minute), ); factory.register( - "toStartOfFiveMinutes", + "to_start_of_five_minutes", Self::round_function_creator(Round::FiveMinutes), ); factory.register( - "toStartOfTenMinutes", + "to_start_of_ten_minutes", Self::round_function_creator(Round::TenMinutes), ); factory.register( - "toStartOfFifteenMinutes", + "to_start_of_fifteen_minutes", Self::round_function_creator(Round::FifteenMinutes), ); - factory.register("timeSlot", Self::round_function_creator(Round::TimeSlot)); - factory.register("toStartOfHour", Self::round_function_creator(Round::Hour)); - factory.register("toStartOfDay", Self::round_function_creator(Round::Day)); + factory.register("time_slot", Self::round_function_creator(Round::TimeSlot)); + factory.register( + "to_start_of_hour", + Self::round_function_creator(Round::Hour), + ); + factory.register("to_start_of_day", Self::round_function_creator(Round::Day)); - factory.register("toStartOfWeek", ToStartOfWeekFunction::desc()); + factory.register("to_start_of_week", ToStartOfWeekFunction::desc()); // interval functions - factory.register("addYears", AddYearsFunction::desc(1)); - factory.register("addMonths", AddMonthsFunction::desc(1)); - factory.register("addDays", AddDaysFunction::desc(1)); - factory.register("addHours", AddTimesFunction::desc(3600)); - factory.register("addMinutes", AddTimesFunction::desc(60)); - factory.register("addSeconds", AddTimesFunction::desc(1)); - factory.register("subtractYears", AddYearsFunction::desc(-1)); - factory.register("subtractMonths", AddMonthsFunction::desc(-1)); - factory.register("subtractDays", AddDaysFunction::desc(-1)); - factory.register("subtractHours", AddTimesFunction::desc(-3600)); - factory.register("subtractMinutes", AddTimesFunction::desc(-60)); - factory.register("subtractSeconds", AddTimesFunction::desc(-1)); + factory.register("add_years", AddYearsFunction::desc(1)); + factory.register("add_months", AddMonthsFunction::desc(1)); + factory.register("add_days", AddDaysFunction::desc(1)); + factory.register("add_hours", AddTimesFunction::desc(3600)); + factory.register("add_minutes", AddTimesFunction::desc(60)); + factory.register("add_seconds", AddTimesFunction::desc(1)); + factory.register("subtract_years", AddYearsFunction::desc(-1)); + factory.register("subtract_months", AddMonthsFunction::desc(-1)); + factory.register("subtract_days", AddDaysFunction::desc(-1)); + factory.register("subtract_hours", AddTimesFunction::desc(-3600)); + factory.register("subtract_minutes", AddTimesFunction::desc(-60)); + factory.register("subtract_seconds", AddTimesFunction::desc(-1)); factory.register( "to_interval_year", diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/addinterval.md b/docs/doc/30-reference/20-functions/30-datetime-functions/addinterval.md index fd4b14730f118..75ebaa919b4a9 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/addinterval.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/addinterval.md @@ -1,19 +1,19 @@ --- title: Add Time Interval description: Add time interval function -title_includes: addYears, addMonths, addDays, addHours, addMinutes, addSeconds +title_includes: add_years, add_months, add_days, add_hours, add_minutes, add_seconds --- Add time interval to a date or datetime, return the result of date or datetime type. ## Syntax ```sql -addYears(exp0, expr1) -addMonths(exp0, expr1) -addDays(exp0, expr1) -addHours(exp0, expr1) -addMinutes(exp0, expr1) -addSeconds(exp0, expr1) +add_years(exp0, expr1) +add_months(exp0, expr1) +add_days(exp0, expr1) +add_hours(exp0, expr1) +add_minutes(exp0, expr1) +add_seconds(exp0, expr1) ``` ## Return Type @@ -23,44 +23,44 @@ Date, Timestamp, depends on the input. ## Examples ```sql -SELECT to_date(18875), addYears(to_date(18875), 2); +SELECT to_date(18875), add_years(to_date(18875), 2); +---------------+-----------------------------+ -| to_date(18875) | addYears(to_date(18875), 10) | +| to_date(18875) | add_years(to_date(18875), 10) | +---------------+-----------------------------+ | 2021-09-05 | 2023-09-05 | +---------------+-----------------------------+ -SELECT to_date(18875), addMonths(to_date(18875), 2); +SELECT to_date(18875), add_months(to_date(18875), 2); +---------------+-----------------------------+ -| to_date(18875) | addMonths(to_date(18875), 2) | +| to_date(18875) | add_months(to_date(18875), 2) | +---------------+-----------------------------+ | 2021-09-05 | 2021-11-05 | +---------------+-----------------------------+ -SELECT to_date(18875), addDays(to_date(18875), 2); +SELECT to_date(18875), add_days(to_date(18875), 2); +---------------+---------------------------+ -| to_date(18875) | addDays(to_date(18875), 2) | +| to_date(18875) | add_days(to_date(18875), 2) | +---------------+---------------------------+ | 2021-09-05 | 2021-09-07 | +---------------+---------------------------+ -SELECT to_datetime(1630833797), addHours(to_datetime(1630833797), 2); +SELECT to_datetime(1630833797), add_hours(to_datetime(1630833797), 2); +------------------------+-------------------------------------+ -| to_datetime(1630833797) | addHours(to_datetime(1630833797), 2) | +| to_datetime(1630833797) | add_hours(to_datetime(1630833797), 2) | +------------------------+-------------------------------------+ | 2021-09-05 09:23:17 | 2021-09-05 11:23:17 | +------------------------+-------------------------------------+ -SELECT to_datetime(1630833797), addMinutes(to_datetime(1630833797), 2); +SELECT to_datetime(1630833797), add_minutes(to_datetime(1630833797), 2); +------------------------+---------------------------------------+ -| to_datetime(1630833797) | addMinutes(to_datetime(1630833797), 2) | +| to_datetime(1630833797) | add_minutes(to_datetime(1630833797), 2) | +------------------------+---------------------------------------+ | 2021-09-05 09:23:17 | 2021-09-05 09:25:17 | +------------------------+---------------------------------------+ -SELECT to_datetime(1630833797), addSeconds(to_datetime(1630833797), 2); +SELECT to_datetime(1630833797), add_seconds(to_datetime(1630833797), 2); +------------------------+---------------------------------------+ -| to_datetime(1630833797) | addSeconds(to_datetime(1630833797), 2) | +| to_datetime(1630833797) | add_seconds(to_datetime(1630833797), 2) | +------------------------+---------------------------------------+ | 2021-09-05 09:23:17 | 2021-09-05 09:23:19 | +------------------------+---------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/subtractinterval.md b/docs/doc/30-reference/20-functions/30-datetime-functions/subtractinterval.md index 5f31fc35d1a9f..0f381194e4f65 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/subtractinterval.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/subtractinterval.md @@ -1,19 +1,19 @@ --- title: Subtract Time Interval description: Subtract time interval function -title_includes: subtractYears, subtractMonths, subtractDays, subtractHours, subtractMinutes, subtractSeconds +title_includes: subtract_years, subtract_months, subtract_days, subtract_hours, subtract_minutes, subtract_seconds --- Subtract time interval from a date or datetime, return the result of date or datetime type. ## Syntax ```sql -subtractYears(exp0, expr1) -subtractMonths(exp0, expr1) -subtractDays(exp0, expr1) -subtractHours(exp0, expr1) -subtractMinutes(exp0, expr1) -subtractSeconds(exp0, expr1) +subtract_years(exp0, expr1) +subtract_months(exp0, expr1) +subtract_days(exp0, expr1) +subtract_hours(exp0, expr1) +subtract_minutes(exp0, expr1) +subtract_seconds(exp0, expr1) ``` ## Return Type @@ -23,44 +23,44 @@ Date, Timestamp depends on the input. ## Examples ```sql -SELECT to_date(18875), subtractYears(to_date(18875), 2); +SELECT to_date(18875), subtract_years(to_date(18875), 2); +---------------+---------------------------------+ -| to_date(18875) | subtractYears(to_date(18875), 2) | +| to_date(18875) | subtract_years(to_date(18875), 2) | +---------------+---------------------------------+ | 2021-09-05 | 2019-09-05 | +---------------+---------------------------------+ -SELECT to_date(18875), subtractMonths(to_date(18875), 2); +SELECT to_date(18875), subtract_months(to_date(18875), 2); +---------------+----------------------------------+ -| to_date(18875) | subtractMonths(to_date(18875), 2) | +| to_date(18875) | subtract_months(to_date(18875), 2) | +---------------+----------------------------------+ | 2021-09-05 | 2021-07-05 | +---------------+----------------------------------+ -SELECT to_date(18875), subtractDays(to_date(18875), 2); +SELECT to_date(18875), subtract_days(to_date(18875), 2); +---------------+--------------------------------+ -| to_date(18875) | subtractDays(to_date(18875), 2) | +| to_date(18875) | subtract_days(to_date(18875), 2) | +---------------+--------------------------------+ | 2021-09-05 | 2021-09-03 | +---------------+--------------------------------+ -SELECT to_datetime(1630833797), subtractHours(to_datetime(1630833797), 2); +SELECT to_datetime(1630833797), subtract_hours(to_datetime(1630833797), 2); +------------------------+------------------------------------------+ -| to_datetime(1630833797) | subtractHours(to_datetime(1630833797), 2) | +| to_datetime(1630833797) | subtract_hours(to_datetime(1630833797), 2) | +------------------------+------------------------------------------+ | 2021-09-05 09:23:17 | 2021-09-05 07:23:17 | +------------------------+------------------------------------------+ -SELECT to_datetime(1630833797), subtractMinutes(to_datetime(1630833797), 2); +SELECT to_datetime(1630833797), subtract_minutes(to_datetime(1630833797), 2); +------------------------+--------------------------------------------+ -| to_datetime(1630833797) | subtractMinutes(to_datetime(1630833797), 2) | +| to_datetime(1630833797) | subtract_minutes(to_datetime(1630833797), 2) | +------------------------+--------------------------------------------+ | 2021-09-05 09:23:17 | 2021-09-05 09:21:17 | +------------------------+--------------------------------------------+ -SELECT to_datetime(1630833797), subtractSeconds(to_datetime(1630833797), 2); +SELECT to_datetime(1630833797), subtract_seconds(to_datetime(1630833797), 2); +------------------------+--------------------------------------------+ -| to_datetime(1630833797) | subtractSeconds(to_datetime(1630833797), 2) | +| to_datetime(1630833797) | subtract_seconds(to_datetime(1630833797), 2) | +------------------------+--------------------------------------------+ | 2021-09-05 09:23:17 | 2021-09-05 09:23:15 | +------------------------+--------------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/timeslot.md b/docs/doc/30-reference/20-functions/30-datetime-functions/timeslot.md index 581b34bb35c68..3ce875b29a59a 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/timeslot.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/timeslot.md @@ -1,12 +1,12 @@ --- -title: TIMESLOT +title: time_slot --- Rounds the time to the half hour. ## Syntax ```sql -TIMESLOT( ) +time_slot( ) ``` ## Arguments @@ -21,16 +21,16 @@ Datetime object, returns date in “YYYY-MM-DD hh:mm:ss” format. ## Examples ```sql -SELECT timeslot(now()); +SELECT time_slot(now()); +---------------------+ -| timeslot(now()) | +| time_slot(now()) | +---------------------+ | 2022-03-29 06:30:00 | +---------------------+ -SELECT timeslot(to_datetime(1630812366)); +SELECT time_slot(to_datetime(1630812366)); +----------------------------------+ -| timeslot(to_datetime(1630812366)) | +| time_slot(to_datetime(1630812366)) | +----------------------------------+ | 2021-09-05 03:00:00 | +----------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/toMonth.md b/docs/doc/30-reference/20-functions/30-datetime-functions/toMonth.md index ff78c6134ef67..012a09a7dec82 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/toMonth.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/toMonth.md @@ -1,5 +1,5 @@ --- -title: toMonth +title: to_month --- Converts a date or date with time to a UInt8 number containing the month number (1-12). @@ -7,7 +7,7 @@ Converts a date or date with time to a UInt8 number containing the month number ## Syntax ```sql -toMonth( ) +to_month( ) ``` ## Arguments @@ -23,16 +23,16 @@ toMonth( ) ## Examples ```sql -SELECT toMonth(to_date(18869)); +SELECT to_month(to_date(18869)); +------------------------+ -| toMonth(to_date(18869)) | +| to_month(to_date(18869)) | +------------------------+ | 8 | +------------------------+ - SELECT toMonth(to_datetime(1630812366)); + SELECT to_month(to_datetime(1630812366)); +---------------------------------+ -| toMonth(to_datetime(1630812366)) | +| to_month(to_datetime(1630812366)) | +---------------------------------+ | 9 | +---------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/todayofmonth.md b/docs/doc/30-reference/20-functions/30-datetime-functions/todayofmonth.md index 31cf7c9215ac2..4b039f98f400b 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/todayofmonth.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/todayofmonth.md @@ -1,5 +1,5 @@ --- -title: TODAYOFMONTH +title: to_day_of_month --- Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). @@ -7,7 +7,7 @@ Converts a date or date with time to a UInt8 number containing the number of the ## Syntax ```sql -toDayOfMonth( ) +to_day_of_month( ) ``` ## Arguments @@ -22,16 +22,16 @@ toDayOfMonth( ) ## Examples ```sql -SELECT toDayOfMonth(to_date(18869)); +SELECT to_day_of_month(to_date(18869)); +-----------------------------+ -| toDayOfMonth(to_date(18869)) | +| to_day_of_month(to_date(18869)) | +-----------------------------+ | 30 | +-----------------------------+ -SELECT toDayOfMonth(to_datetime(1630812366)); +SELECT to_day_of_month(to_datetime(1630812366)); +--------------------------------------+ -| toDayOfMonth(to_datetime(1630812366)) | +| to_day_of_month(to_datetime(1630812366)) | +--------------------------------------+ | 5 | +--------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/todayofweek.md b/docs/doc/30-reference/20-functions/30-datetime-functions/todayofweek.md index 9528536aa34d7..73a0092286486 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/todayofweek.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/todayofweek.md @@ -1,5 +1,5 @@ --- -title: toDayOfWeek +title: to_day_of_week --- Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7). @@ -7,7 +7,7 @@ Converts a date or date with time to a UInt8 number containing the number of the ## Syntax ```sql -toDayOfWeek( ) +to_day_of_week( ) ``` ## Arguments @@ -22,16 +22,16 @@ toDayOfWeek( ) ## Examples ```sql -SELECT toDayOfWeek(to_date(18869)); +SELECT to_day_of_week(to_date(18869)); +----------------------------+ -| toDayOfWeek(to_date(18869)) | +| to_day_of_week(to_date(18869)) | +----------------------------+ | 1 | +----------------------------+ -SELECT toDayOfWeek(now()); +SELECT to_day_of_week(now()); +--------------------+ -| toDayOfWeek(now()) | +| to_day_of_week(now()) | +--------------------+ | 2 | +--------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/todayofyear.md b/docs/doc/30-reference/20-functions/30-datetime-functions/todayofyear.md index 0fd84a2c91075..aa182968e5e1f 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/todayofyear.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/todayofyear.md @@ -1,5 +1,5 @@ --- -title: toDayOfYear +title: to_day_of_year --- Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366). @@ -7,7 +7,7 @@ Converts a date or date with time to a UInt16 number containing the number of th ## Syntax ```sql -toDayOfYear( ) +to_day_of_year( ) ``` ## Arguments @@ -23,16 +23,16 @@ A `UInt16` number datatype. ## Examples ```sql -SELECT toDayOfYear(to_date(18869)); +SELECT to_day_of_year(to_date(18869)); +----------------------------+ -| toDayOfYear(to_date(18869)) | +| to_day_of_year(to_date(18869)) | +----------------------------+ | 242 | +----------------------------+ -SELECT toDayOfYear(now()); +SELECT to_day_of_year(now()); +--------------------+ -| toDayOfYear(now()) | +| to_day_of_year(now()) | +--------------------+ | 88 | +--------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tohour.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tohour.md index 524eb61c7a826..ff9fbb6694adb 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tohour.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tohour.md @@ -1,5 +1,5 @@ --- -title: toHour +title: to_hour --- Converts a date with time to a UInt8 number containing the number of the hour in 24-hour time (0-23). @@ -8,7 +8,7 @@ This function assumes that if clocks are moved ahead, it is by one hour and occu ## Syntax ```sql -toHour( ) +to_hour( ) ``` ## Arguments @@ -24,16 +24,16 @@ toHour( ) ## Examples ```sql -SELECT toHour(now()); +SELECT to_hour(now()); +---------------+ -| toHour(now()) | +| to_hour(now()) | +---------------+ | 6 | +---------------+ -SELECT toHour(to_datetime(1630812366)); +SELECT to_hour(to_datetime(1630812366)); +--------------------------------+ -| toHour(to_datetime(1630812366)) | +| to_hour(to_datetime(1630812366)) | +--------------------------------+ | 3 | +--------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tominute.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tominute.md index 5f00babf0c79e..8801c863f543c 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tominute.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tominute.md @@ -1,5 +1,5 @@ --- -title: toMinute +title: to_minute --- Converts a date with time to a UInt8 number containing the number of the minute of the hour (0-59). @@ -7,7 +7,7 @@ Converts a date with time to a UInt8 number containing the number of the minute ## Syntax ```sql -toMinute( ) +to_minute( ) ``` ## Arguments @@ -23,16 +23,16 @@ toMinute( ) ## Examples ```sql -SELECT toMinute(now()); +SELECT to_minute(now()); +-----------------+ -| toMinute(now()) | +| to_minute(now()) | +-----------------+ | 17 | +-----------------+ -SELECT toMinute(to_datetime(1630812366)); +SELECT to_minute(to_datetime(1630812366)); +----------------------------------+ -| toMinute(to_datetime(1630812366)) | +| to_minute(to_datetime(1630812366)) | +----------------------------------+ | 26 | +----------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tomonday.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tomonday.md index b5bd3b25465ee..fa61254b5e145 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tomonday.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tomonday.md @@ -1,5 +1,5 @@ --- -title: toMonday +title: to_monday --- Rounds down a date or date with time to the nearest Monday. @@ -8,7 +8,7 @@ Returns the date. ## Syntax ```sql -toMonday( ) +to_monday( ) ``` ## Arguments @@ -24,23 +24,23 @@ toMonday( ) ## Examples ```sql -SELECT toMonday(now()); +SELECT to_monday(now()); +-----------------+ -| tomonday(now()) | +| to_monday(now()) | +-----------------+ | 19079 | +-----------------+ -SELECT to_date(toMonday(now())); +SELECT to_date(to_monday(now())); +-------------------------+ -| to_date(toMonday(now())) | +| to_date(to_monday(now())) | +-------------------------+ | 2022-03-28 | +-------------------------+ -SELECT toMonday(to_datetime(1630812366)); +SELECT to_monday(to_datetime(1630812366)); +----------------------------------+ -| toMonday(to_datetime(1630812366)) | +| to_monday(to_datetime(1630812366)) | +----------------------------------+ | 18869 | +----------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tosecond.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tosecond.md index de6f55f0fd1c2..b3f3dcc4b5cee 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tosecond.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tosecond.md @@ -1,5 +1,5 @@ --- -title: toSecond +title: to_second --- Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59). @@ -7,7 +7,7 @@ Converts a date with time to a UInt8 number containing the number of the second ## Syntax ```sql -toSecond( ) +to_second( ) ``` ## Arguments @@ -23,16 +23,16 @@ toSecond( ) ## Examples ```sql -SELECT toSecond(now()); +SELECT to_second(now()); +-----------------+ -| toSecond(now()) | +| to_second(now()) | +-----------------+ | 14 | +-----------------+ -SELECT toSecond(to_datetime(1630812366)); +SELECT to_second(to_datetime(1630812366)); +----------------------------------+ -| toSecond(to_datetime(1630812366)) | +| to_second(to_datetime(1630812366)) | +----------------------------------+ | 6 | +----------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofday.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofday.md index 963373f4d546a..7c38fa82266f0 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofday.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofday.md @@ -1,12 +1,12 @@ --- -title: toStartOfDay +title: to_start_of_day --- Rounds down a date with time to the start of the day. ## Syntax ```sql -toStartOfDay( ) +to_start_of_day( ) ``` ## Arguments @@ -22,16 +22,16 @@ Datetime object, returns date in “YYYY-MM-DD hh:mm:ss” format. ## Examples ```sql -SELECT toStartOfDay(now()); +SELECT to_start_of_day(now()); +---------------------+ -| toStartOfDay(now()) | +| to_start_of_day(now()) | +---------------------+ | 2022-03-29 00:00:00 | +---------------------+ -SELECT toStartOfDay(to_datetime(1630812366)); +SELECT to_start_of_day(to_datetime(1630812366)); +--------------------------------------+ -| toStartOfDay(to_datetime(1630812366)) | +| to_start_of_day(to_datetime(1630812366)) | +--------------------------------------+ | 2021-09-05 00:00:00 | +--------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartoffifteenminutes.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartoffifteenminutes.md index 5bc3cebd856fe..5edc84649637e 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartoffifteenminutes.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartoffifteenminutes.md @@ -1,12 +1,12 @@ --- -title: toStartOfFifteenMinutes +title: to_start_of_fifteen_minutes --- Rounds down the date with time to the start of the fifteen-minute interval. ## Syntax ```sql -toStartOfFifteenMinutes( ) +to_start_of_fifteen_minutes( ) ``` ## Arguments @@ -22,16 +22,16 @@ Datetime object, returns date in “YYYY-MM-DD hh:mm:ss” format. ## Examples ```sql -SELECT toStartOfFifteenMinutes(now()); +SELECT to_start_of_fifteen_minutes(now()); +--------------------------------+ -| toStartOfFifteenMinutes(now()) | +| to_start_of_fifteen_minutes(now()) | +--------------------------------+ | 2022-03-29 06:45:00 | +--------------------------------+ -SELECT toStartOfFifteenMinutes(to_datetime(1630812366)); +SELECT to_start_of_fifteen_minutes(to_datetime(1630812366)); +-------------------------------------------------+ -| toStartOfFifteenMinutes(to_datetime(1630812366)) | +| to_start_of_fifteen_minutes(to_datetime(1630812366)) | +-------------------------------------------------+ | 2021-09-05 03:15:00 | +-------------------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartoffiveminutes.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartoffiveminutes.md index af3eabedb5dab..6bc3b36fa5cda 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartoffiveminutes.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartoffiveminutes.md @@ -1,12 +1,12 @@ --- -title: toStartOfFiveMinutes +title: to_start_of_five_minutes --- Rounds down a date with time to the start of the five-minute interval. ## Syntax ```sql -toStartOfFiveMinutes( ) +to_start_of_five_minutes( ) ``` ## Arguments @@ -22,16 +22,16 @@ Datetime object, returns date in “YYYY-MM-DD hh:mm:ss” format. ## Examples ```sql -SELECT toStartOfFiveMinutes(now()); +SELECT to_start_of_five_minutes(now()); +-----------------------------+ -| toStartOfFiveMinutes(now()) | +| to_start_of_five_minutes(now()) | +-----------------------------+ | 2022-03-29 06:45:00 | +-----------------------------+ -SELECT toStartOfFiveMinutes(to_datetime(1630812366)); +SELECT to_start_of_five_minutes(to_datetime(1630812366)); +----------------------------------------------+ -| toStartOfFiveMinutes(to_datetime(1630812366)) | +| to_start_of_five_minutes(to_datetime(1630812366)) | +----------------------------------------------+ | 2021-09-05 03:25:00 | +----------------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofhour.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofhour.md index d1163464a6c3e..5bf353c6905d5 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofhour.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofhour.md @@ -1,12 +1,12 @@ --- -title: toStartOfHour +title: to_start_of_hour --- Rounds down a date with time to the start of the hour. ## Syntax ```sql -toStartOfHour( ) +to_start_of_hour( ) ``` ## Arguments @@ -22,16 +22,16 @@ Datetime object, returns date in “YYYY-MM-DD hh:mm:ss” format. ## Examples ```sql -SELECT toStartOfHour(now()); +SELECT to_start_of_hour(now()); +----------------------+ -| toStartOfHour(now()) | +| to_start_of_hour(now()) | +----------------------+ | 2022-03-29 06:00:00 | +----------------------+ -SELECT toStartOfHour(to_datetime(1630812366)); +SELECT to_start_of_hour(to_datetime(1630812366)); +---------------------------------------+ -| toStartOfHour(to_datetime(1630812366)) | +| to_start_of_hour(to_datetime(1630812366)) | +---------------------------------------+ | 2021-09-05 03:00:00 | +---------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofisoyear.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofisoyear.md index 98d90fce7ba57..90fea8593ea97 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofisoyear.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofisoyear.md @@ -1,5 +1,5 @@ --- -title: toStartOfISOYear +title: to_start_of_iso_year --- Returns the first day of the ISO year for a date or a date with time. @@ -7,7 +7,7 @@ Returns the first day of the ISO year for a date or a date with time. ## Syntax ```sql -toStartOfISOYear( ) +to_start_of_iso_year( ) ``` ## Arguments @@ -23,16 +23,16 @@ Datetime object, returns date in “YYYY-MM-DD” format. ## Examples ```sql -SELECT toStartOfISOYear(to_date(18869)); +SELECT to_start_of_iso_year(to_date(18869)); +---------------------------------+ -| toStartOfISOYear(to_date(18869)) | +| to_start_of_iso_year(to_date(18869)) | +---------------------------------+ | 2021-01-04 | +---------------------------------+ -SELECT toStartOfISOYear(to_datetime(1630812366)); +SELECT to_start_of_iso_year(to_datetime(1630812366)); +------------------------------------------+ -| toStartOfISOYear(to_datetime(1630812366)) | +| to_start_of_iso_year(to_datetime(1630812366)) | +------------------------------------------+ | 2021-01-04 | +------------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofminute.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofminute.md index 65dab0f3f8877..838cbe2ac1e8d 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofminute.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofminute.md @@ -1,5 +1,5 @@ --- -title: toStartOfMinute +title: to_start_of_minute --- Rounds down a date with time to the start of the minute. @@ -7,7 +7,7 @@ Rounds down a date with time to the start of the minute. ## Syntax ```sql -toStartOfMinute( ) +to_start_of_minute( ) ``` ## Arguments @@ -23,16 +23,16 @@ Datetime object, returns date in “YYYY-MM-DD hh:mm:ss” format. ## Examples ```sql -SELECT toStartOfMinute(now()); +SELECT to_start_of_minute(now()); +------------------------+ -| toStartOfMinute(now()) | +| to_start_of_minute(now()) | +------------------------+ | 2022-03-29 06:43:00 | +------------------------+ -SELECT toStartOfMinute(to_datetime(1630812366)); +SELECT to_start_of_minute(to_datetime(1630812366)); +-----------------------------------------+ -| toStartOfMinute(to_datetime(1630812366)) | +| to_start_of_minute(to_datetime(1630812366)) | +-----------------------------------------+ | 2021-09-05 03:26:00 | +-----------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofmonth.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofmonth.md index 7365b8157c991..6e44e8484431f 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofmonth.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofmonth.md @@ -1,5 +1,5 @@ --- -title: toStartOfMonth +title: to_start_of_month --- Rounds down a date or date with time to the first day of the month. @@ -8,7 +8,7 @@ Returns the date. ## Syntax ```sql -toStartOfMonth( ) +to_start_of_month( ) ``` ## Arguments @@ -23,16 +23,16 @@ Datetime object, returns date in “YYYY-MM-DD” format. ## Examples ```sql -SELECT toStartOfMonth(to_date(18869)); +SELECT to_start_of_month(to_date(18869)); +-------------------------------+ -| toStartOfMonth(to_date(18869)) | +| to_start_of_month(to_date(18869)) | +-------------------------------+ | 2021-08-01 | +-------------------------------+ -SELECT toStartOfMonth(to_datetime(1630812366)); +SELECT to_start_of_month(to_datetime(1630812366)); +----------------------------------------+ -| toStartOfMonth(to_datetime(1630812366)) | +| to_start_of_month(to_datetime(1630812366)) | +----------------------------------------+ | 2021-09-01 | +----------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofquarter.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofquarter.md index 1c84f7aaa3d62..5923bc23d13e2 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofquarter.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofquarter.md @@ -1,5 +1,5 @@ --- -title: toStartOfQuarter +title: to_start_of_quarter --- Rounds down a date or date with time to the first day of the quarter. @@ -9,7 +9,7 @@ Returns the date. ## Syntax ```sql -toStartOfQuarter(expr) +to_start_of_quarter(expr) ``` ## Arguments @@ -24,16 +24,16 @@ Datetime object, returns date in “YYYY-MM-DD” format. ## Examples ```sql -SELECT toStartOfQuarter(to_date(18869)); +SELECT to_start_of_quarter(to_date(18869)); +---------------------------------+ -| toStartOfQuarter(to_date(18869)) | +| to_start_of_quarter(to_date(18869)) | +---------------------------------+ | 2021-07-01 | +---------------------------------+ -SELECT toStartOfQuarter(to_datetime(1630812366)); +SELECT to_start_of_quarter(to_datetime(1630812366)); +------------------------------------------+ -| toStartOfQuarter(to_datetime(1630812366)) | +| to_start_of_quarter(to_datetime(1630812366)) | +------------------------------------------+ | 2021-07-01 | +------------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofsecond.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofsecond.md index 5e94e9b5697c3..ad6ae29915227 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofsecond.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofsecond.md @@ -1,5 +1,5 @@ --- -title: toStartOfSecond +title: to_start_of_second --- Rounds down a date with time to the start of the second. @@ -7,7 +7,7 @@ Rounds down a date with time to the start of the second. ## Syntax ```sql -toStartOfSecond(expr) +to_start_of_second(expr) ``` ## Arguments @@ -23,9 +23,9 @@ Datetime object, returns date in “YYYY-MM-DD hh:mm:ss” format. ## Examples ```sql -SELECT toStartOfSecond(now());; +SELECT to_start_of_second(now());; +------------------------+ -| toStartOfSecond(now()) | +| to_start_of_second(now()) | +------------------------+ | 2022-04-13 13:53:47 | +------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartoftenminutes.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartoftenminutes.md index f6df378e6cfec..31d3f6ce8c3c6 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartoftenminutes.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartoftenminutes.md @@ -1,5 +1,5 @@ --- -title: toStartOfTenMinutes +title: to_start_of_ten_minutes --- Rounds down a date with time to the start of the ten-minute interval. @@ -7,7 +7,7 @@ Rounds down a date with time to the start of the ten-minute interval. ## Syntax ```sql -toStartOfTenMinutes(expr) +to_start_of_ten_minutes(expr) ``` ## Arguments @@ -23,16 +23,16 @@ Datetime object, returns date in “YYYY-MM-DD hh:mm:ss” format. ## Examples ```sql -SELECT toStartOfTenMinutes(now()); +SELECT to_start_of_ten_minutes(now()); +----------------------------+ -| toStartOfTenMinutes(now()) | +| to_start_of_ten_minutes(now()) | +----------------------------+ | 2022-03-29 06:40:00 | +----------------------------+ -SELECT toStartOfTenMinutes(to_datetime(1630812366)); +SELECT to_start_of_ten_minutes(to_datetime(1630812366)); +---------------------------------------------+ -| toStartOfTenMinutes(to_datetime(1630812366)) | +| to_start_of_ten_minutes(to_datetime(1630812366)) | +---------------------------------------------+ | 2021-09-05 03:20:00 | +---------------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofweek.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofweek.md index b297be218e3c7..81a738b0da606 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofweek.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofweek.md @@ -1,5 +1,5 @@ --- -title: toStartOfWeek +title: to_start_of_week --- Returns the first day of the year for a date or a date with time. @@ -7,7 +7,7 @@ Returns the first day of the year for a date or a date with time. ## Syntax ```sql -toStartOfWeek(expr) +to_start_of_week(expr) ``` ## Arguments @@ -23,16 +23,16 @@ Datetime object, returns date in “YYYY-MM-DD” format. ## Examples ```sql -SELECT toStartOfWeek(now()); +SELECT to_start_of_week(now()); +----------------------+ -| toStartOfWeek(now()) | +| to_start_of_week(now()) | +----------------------+ | 2022-03-27 | +----------------------+ -SELECT toStartOfWeek(to_datetime(1630812366)); +SELECT to_start_of_week(to_datetime(1630812366)); +---------------------------------------+ -| toStartOfWeek(to_datetime(1630812366)) | +| to_start_of_week(to_datetime(1630812366)) | +---------------------------------------+ | 2021-09-05 | +---------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofyear.md b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofyear.md index 65bfdcc5b5d3a..9996050547f62 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofyear.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/tostartofyear.md @@ -1,5 +1,5 @@ --- -title: toStartOfYear +title: to_start_of_year --- Returns the first day of the year for a date or a date with time. @@ -7,7 +7,7 @@ Returns the first day of the year for a date or a date with time. ## Syntax ```sql -toStartOfYear( ) +to_start_of_year( ) ``` ## Arguments @@ -23,16 +23,16 @@ Datetime object, returns date in “YYYY-MM-DD” format. ## Examples ``` -SELECT toStartOfYear(to_date(18869)); +SELECT to_start_of_year(to_date(18869)); +------------------------------+ -| toStartOfYear(to_date(18869)) | +| to_start_of_year(to_date(18869)) | +------------------------------+ | 2021-01-01 | +------------------------------+ -SELECT toStartOfYear(to_datetime(1630812366)); +SELECT to_start_of_year(to_datetime(1630812366)); +---------------------------------------+ -| toStartOfYear(to_datetime(1630812366)) | +| to_start_of_year(to_datetime(1630812366)) | +---------------------------------------+ | 2021-01-01 | +---------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/toyear.md b/docs/doc/30-reference/20-functions/30-datetime-functions/toyear.md index 81ad0ca78426b..d441244d0fec8 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/toyear.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/toyear.md @@ -1,5 +1,5 @@ --- -title: TOYEAR +title: to_year --- Converts a date or date with time to a UInt16 number containing the year number (AD). @@ -7,7 +7,7 @@ Converts a date or date with time to a UInt16 number containing the year number ## Syntax ```sql -TOYEAR( ) +to_year( ) ``` ## Arguments @@ -23,16 +23,16 @@ A `UInt16` date type value ## Examples ```sql -SELECT toyear(now()); +SELECT to_year(now()); +---------------+ -| toyear(now()) | +| to_year(now()) | +---------------+ | 2022 | +---------------+ -SELECT toyear(to_datetime(1)); +SELECT to_year(to_datetime(1)); +-----------------------+ -| toyear(to_datetime(1)) | +| to_year(to_datetime(1)) | +-----------------------+ | 1970 | +-----------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/toyyyymm.md b/docs/doc/30-reference/20-functions/30-datetime-functions/toyyyymm.md index 7361da9dc001a..81f0ae0328255 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/toyyyymm.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/toyyyymm.md @@ -1,5 +1,5 @@ --- -title: toYYYYMM +title: to_yyyymm --- Converts a date or date with time to a UInt32 number containing the year and month number. @@ -7,7 +7,7 @@ Converts a date or date with time to a UInt32 number containing the year and mon ## Syntax ```sql -toYYYYMM( ) +to_yyyymm( ) ``` ## Arguments @@ -30,9 +30,9 @@ SELECT to_date(18875); | 2021-09-05 | +---------------+ -SELECT toYYYYMM(to_date(18875)); +SELECT to_yyyymm(to_date(18875)); +-------------------------+ -| toYYYYMM(to_date(18875)) | +| to_yyyymm(to_date(18875)) | +-------------------------+ | 202109 | +-------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/toyyyymmdd.md b/docs/doc/30-reference/20-functions/30-datetime-functions/toyyyymmdd.md index bfca0f44667f5..207656aade0a7 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/toyyyymmdd.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/toyyyymmdd.md @@ -1,12 +1,12 @@ --- -title: toYYYYMMDD +title: to_yyyymmdd --- Converts a date or date with time to a UInt32 number containing the year and month number (YYYY * 10000 + MM * 100 + DD). ## Syntax ```sql -toYYYYMMDD( ) +to_yyyymmdd( ) ``` ## Arguments @@ -29,9 +29,9 @@ SELECT to_date(18875); | 2021-09-05 | +---------------+ -SELECT toYYYYMMDD(to_date(18875)); +SELECT to_yyyymmdd(to_date(18875)); +---------------------------+ -| toYYYYMMDD(to_date(18875)) | +| to_yyyymmdd(to_date(18875)) | +---------------------------+ | 20210905 | +---------------------------+ @@ -43,9 +43,9 @@ SELECT to_datetime(1630833797); | 2021-09-05 09:23:17 | +------------------------+ -SELECT toYYYYMMDD(to_datetime(1630833797)); +SELECT to_yyyymmdd(to_datetime(1630833797)); +------------------------------------+ -| toYYYYMMDD(to_datetime(1630833797)) | +| to_yyyymmdd(to_datetime(1630833797)) | +------------------------------------+ | 20210905 | +------------------------------------+ diff --git a/docs/doc/30-reference/20-functions/30-datetime-functions/toyyyymmddhhmmss.md b/docs/doc/30-reference/20-functions/30-datetime-functions/toyyyymmddhhmmss.md index a42f4e2c4fa6d..63fa811ccfde7 100644 --- a/docs/doc/30-reference/20-functions/30-datetime-functions/toyyyymmddhhmmss.md +++ b/docs/doc/30-reference/20-functions/30-datetime-functions/toyyyymmddhhmmss.md @@ -1,12 +1,12 @@ --- -title: toYYYYMMDDhhmmss +title: to_yyyymmddhhmmss --- Converts a date or date with time to a UInt64 number containing the year and month number (YYYY * 10000000000 + MM * 100000000 + DD * 1000000 + hh * 10000 + mm * 100 + ss). ## Syntax ```sql -toYYYYMMDDhhmmss( ) +to_yyyymmddhhmmss( ) ``` ## Arguments @@ -29,9 +29,9 @@ SELECT to_date(18875); | 2021-09-05 | +---------------+ -SELECT toYYYYMMDDhhmmss(to_date(18875)); +SELECT to_yyyymmddhhmmss(to_date(18875)); +---------------------------------+ -| toYYYYMMDDhhmmss(to_date(18875)) | +| to_yyyymmddhhmmss(to_date(18875)) | +---------------------------------+ | 20210905000000 | +---------------------------------+ @@ -43,9 +43,9 @@ SELECT to_datetime(1630833797); | 2021-09-05 09:23:17 | +------------------------+ -SELECT toYYYYMMDDhhmmss(to_datetime(1630833797)); +SELECT to_yyyymmddhhmmss(to_datetime(1630833797)); +------------------------------------------+ -| toYYYYMMDDhhmmss(to_datetime(1630833797)) | +| to_yyyymmddhhmmss(to_datetime(1630833797)) | +------------------------------------------+ | 20210905092317 | +------------------------------------------+ From 7a2f2cc19e69ed8a125d56d78b55867732736c30 Mon Sep 17 00:00:00 2001 From: elijah Date: Tue, 9 Aug 2022 22:00:47 +0800 Subject: [PATCH 32/59] chore: rename datetime functions in test suites --- .../src/analyzer/analyzer_expr_sync.rs | 12 +- .../70-system-tables/system-functions.md | 22 +- query/src/sql/planner/semantic/type_check.rs | 16 +- query/src/sql/statements/analyzer_expr.rs | 12 +- .../02_function/02_0012_function_datetimes | 204 +++++++++--------- .../02_function/02_0012_function_datetimes_tz | 76 +++---- .../base/06_show/06_0005_show_functions | 13 +- .../base/06_show/06_0005_show_functions_v2 | 12 +- .../suites/0_stateless/13_tpch/13_0001_q1.sql | 2 +- .../suites/0_stateless/13_tpch/13_0004_q4.sql | 2 +- .../suites/0_stateless/13_tpch/13_0005_q5.sql | 2 +- .../0_stateless/13_tpch/13_0010_q10.sql | 2 +- .../0_stateless/13_tpch/13_0014_q14.sql | 2 +- .../0_stateless/13_tpch/13_0020_q20.sql | 2 +- 14 files changed, 186 insertions(+), 193 deletions(-) diff --git a/common/legacy-parser/src/analyzer/analyzer_expr_sync.rs b/common/legacy-parser/src/analyzer/analyzer_expr_sync.rs index bd19cc6222a7c..478cb5a1316d3 100644 --- a/common/legacy-parser/src/analyzer/analyzer_expr_sync.rs +++ b/common/legacy-parser/src/analyzer/analyzer_expr_sync.rs @@ -531,22 +531,22 @@ impl ExprRPNBuilder { Expr::Extract { field, .. } => match field { DateTimeField::Year => self .rpn - .push(ExprRPNItem::function(String::from("toYear"), 1)), + .push(ExprRPNItem::function(String::from("to_year"), 1)), DateTimeField::Month => self .rpn - .push(ExprRPNItem::function(String::from("toMonth"), 1)), + .push(ExprRPNItem::function(String::from("to_month"), 1)), DateTimeField::Day => self .rpn - .push(ExprRPNItem::function(String::from("toDayOfMonth"), 1)), + .push(ExprRPNItem::function(String::from("to_day_of_month"), 1)), DateTimeField::Hour => self .rpn - .push(ExprRPNItem::function(String::from("toHour"), 1)), + .push(ExprRPNItem::function(String::from("to_hour"), 1)), DateTimeField::Minute => self .rpn - .push(ExprRPNItem::function(String::from("toMinute"), 1)), + .push(ExprRPNItem::function(String::from("to_minute"), 1)), DateTimeField::Second => self .rpn - .push(ExprRPNItem::function(String::from("toSecond"), 1)), + .push(ExprRPNItem::function(String::from("to_second"), 1)), }, Expr::MapAccess { keys, .. } => { self.rpn.push(ExprRPNItem::MapAccess(keys.to_owned())); diff --git a/docs/doc/30-reference/30-sql/70-system-tables/system-functions.md b/docs/doc/30-reference/30-sql/70-system-tables/system-functions.md index 6f6adcebe038a..5ab26c63ad40b 100644 --- a/docs/doc/30-reference/30-sql/70-system-tables/system-functions.md +++ b/docs/doc/30-reference/30-sql/70-system-tables/system-functions.md @@ -92,24 +92,24 @@ is_aggregate: 0 syntax: example: *************************** 6. row *************************** - name: tostartofday + name: to_start_of_day is_builtin: 1 is_aggregate: 0 definition: category: datetime description: Rounds down a date with time to the start of the day. - syntax: toStartOfDay(expr) + syntax: to_start_of_day(expr) - example: SELECT toStartOfDay(now()); + example: SELECT to_start_of_day(now()); +---------------------+ -| toStartOfDay(now()) | +| to_start_of_day(now()) | +---------------------+ | 2022-03-29 00:00:00 | +---------------------+ -SELECT toStartOfDay(to_datetime(1630812366)); +SELECT to_start_of_day(to_datetime(1630812366)); +--------------------------------------+ -| toStartOfDay(to_datetime(1630812366)) | +| to_start_of_day(to_datetime(1630812366)) | +--------------------------------------+ | 2021-09-05 00:00:00 | +--------------------------------------+ @@ -177,18 +177,18 @@ is_aggregate: 0 definition: category: datetime description: Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59). - syntax: toSecond(expr) + syntax: to_second(expr) - example: SELECT toSecond(now()); + example: SELECT to_second(now()); +-----------------+ -| toSecond(now()) | +| to_second(now()) | +-----------------+ | 14 | +-----------------+ -SELECT toSecond(to_datetime(1630812366)); +SELECT to_second(to_datetime(1630812366)); +----------------------------------+ -| toSecond(to_datetime(1630812366)) | +| to_second(to_datetime(1630812366)) | +----------------------------------+ | 6 | +----------------------------------+ diff --git a/query/src/sql/planner/semantic/type_check.rs b/query/src/sql/planner/semantic/type_check.rs index 5b916a1ea7ea0..a6a944427fa3c 100644 --- a/query/src/sql/planner/semantic/type_check.rs +++ b/query/src/sql/planner/semantic/type_check.rs @@ -1074,38 +1074,38 @@ impl<'a> TypeChecker<'a> { ) -> Result> { match interval_kind { IntervalKind::Year => { - self.resolve_function(span, "toYear", &[arg], Some(TimestampType::new_impl(0))) + self.resolve_function(span, "to_year", &[arg], Some(TimestampType::new_impl(0))) .await } IntervalKind::Month => { - self.resolve_function(span, "toMonth", &[arg], Some(TimestampType::new_impl(0))) + self.resolve_function(span, "to_month", &[arg], Some(TimestampType::new_impl(0))) .await } IntervalKind::Day => { self.resolve_function( span, - "toDayOfMonth", + "to_day_of_month", &[arg], Some(TimestampType::new_impl(0)), ) .await } IntervalKind::Hour => { - self.resolve_function(span, "toHour", &[arg], Some(TimestampType::new_impl(0))) + self.resolve_function(span, "to_hour", &[arg], Some(TimestampType::new_impl(0))) .await } IntervalKind::Minute => { - self.resolve_function(span, "toMinute", &[arg], Some(TimestampType::new_impl(0))) + self.resolve_function(span, "to_minute", &[arg], Some(TimestampType::new_impl(0))) .await } IntervalKind::Second => { - self.resolve_function(span, "toSecond", &[arg], Some(TimestampType::new_impl(0))) + self.resolve_function(span, "to_second", &[arg], Some(TimestampType::new_impl(0))) .await } IntervalKind::Doy => { self.resolve_function( span, - "toDayOfYear", + "to_day_of_year", &[arg], Some(TimestampType::new_impl(0)), ) @@ -1114,7 +1114,7 @@ impl<'a> TypeChecker<'a> { IntervalKind::Dow => { self.resolve_function( span, - "toDayOfWeek", + "to_day_of_week", &[arg], Some(TimestampType::new_impl(0)), ) diff --git a/query/src/sql/statements/analyzer_expr.rs b/query/src/sql/statements/analyzer_expr.rs index d9b5c2b896f26..879a7308a3a0a 100644 --- a/query/src/sql/statements/analyzer_expr.rs +++ b/query/src/sql/statements/analyzer_expr.rs @@ -815,22 +815,22 @@ impl ExprRPNBuilder { Expr::Extract { field, .. } => match field { DateTimeField::Year => self .rpn - .push(ExprRPNItem::function(String::from("toYear"), 1)), + .push(ExprRPNItem::function(String::from("to_year"), 1)), DateTimeField::Month => self .rpn - .push(ExprRPNItem::function(String::from("toMonth"), 1)), + .push(ExprRPNItem::function(String::from("to_month"), 1)), DateTimeField::Day => self .rpn - .push(ExprRPNItem::function(String::from("toDayOfMonth"), 1)), + .push(ExprRPNItem::function(String::from("to_day_of_month"), 1)), DateTimeField::Hour => self .rpn - .push(ExprRPNItem::function(String::from("toHour"), 1)), + .push(ExprRPNItem::function(String::from("to_hour"), 1)), DateTimeField::Minute => self .rpn - .push(ExprRPNItem::function(String::from("toMinute"), 1)), + .push(ExprRPNItem::function(String::from("to_minute"), 1)), DateTimeField::Second => self .rpn - .push(ExprRPNItem::function(String::from("toSecond"), 1)), + .push(ExprRPNItem::function(String::from("to_second"), 1)), }, Expr::MapAccess { keys, .. } => { self.rpn.push(ExprRPNItem::MapAccess(keys.to_owned())); diff --git a/tests/logictest/suites/base/02_function/02_0012_function_datetimes b/tests/logictest/suites/base/02_function/02_0012_function_datetimes index ae315919b580b..1dd98b552e7b7 100644 --- a/tests/logictest/suites/base/02_function/02_0012_function_datetimes +++ b/tests/logictest/suites/base/02_function/02_0012_function_datetimes @@ -202,19 +202,19 @@ select tomorrow() - today() = 1; statement query I -select toYYYYMM(to_datetime(1630833797000000)); +select to_yyyymm(to_datetime(1630833797000000)); ---- 202109 statement query I -select toYYYYMM(to_date(18875)); +select to_yyyymm(to_date(18875)); ---- 202109 statement query B -select toYYYYMM(to_datetime(1630833797000000)) = 202109; +select to_yyyymm(to_datetime(1630833797000000)) = 202109; ---- 1 @@ -222,7 +222,7 @@ select toYYYYMM(to_datetime(1630833797000000)) = 202109; statement query B -select toYYYYMM(to_date(18875)) = 202109; +select to_yyyymm(to_date(18875)) = 202109; ---- 1 @@ -230,43 +230,43 @@ select toYYYYMM(to_date(18875)) = 202109; statement query T -select timeSlot(to_datetime(1630320462000000)); +select time_slot(to_datetime(1630320462000000)); ---- 2021-08-30 10:30:00 statement query T -select toStartOfHour(to_datetime(1630320462000000)); +select to_start_of_hour(to_datetime(1630320462000000)); ---- 2021-08-30 10:00:00 statement query T -select toStartOfFifteenMinutes(to_datetime(1630320462000000)); +select to_start_of_fifteen_minutes(to_datetime(1630320462000000)); ---- 2021-08-30 10:45:00 statement query T -select toStartOfMinute(to_datetime(1630320462000000)); +select to_start_of_minute(to_datetime(1630320462000000)); ---- 2021-08-30 10:47:00 statement query T -select toStartOfFiveMinutes(to_datetime(1630320462000000)); +select to_start_of_five_minutes(to_datetime(1630320462000000)); ---- 2021-08-30 10:45:00 statement query T -select toStartOfTenMinutes(to_datetime(1630320462000000)); +select to_start_of_ten_minutes(to_datetime(1630320462000000)); ---- 2021-08-30 10:40:00 statement query B -select timeSlot(now()) <= now(); +select time_slot(now()) <= now(); ---- 1 @@ -274,19 +274,19 @@ select timeSlot(now()) <= now(); statement query I -select toYYYYMMDDhhmmss(to_datetime(1630833797000000)); +select to_yyyymmddhhmmss(to_datetime(1630833797000000)); ---- 20210905092317 statement query I -select toYYYYMMDDhhmmss(to_date(18875)); +select to_yyyymmddhhmmss(to_date(18875)); ---- 20210905000000 statement query B -select toYYYYMMDDhhmmss(to_datetime(1630833797000000)) = 20210905092317; +select to_yyyymmddhhmmss(to_datetime(1630833797000000)) = 20210905092317; ---- 1 @@ -294,7 +294,7 @@ select toYYYYMMDDhhmmss(to_datetime(1630833797000000)) = 20210905092317; statement query B -select toYYYYMMDDhhmmss(to_date(18875)) = 20210905000000; +select to_yyyymmddhhmmss(to_date(18875)) = 20210905000000; ---- 1 @@ -302,19 +302,19 @@ select toYYYYMMDDhhmmss(to_date(18875)) = 20210905000000; statement query I -select toYYYYMMDD(to_datetime(1630833797000000)); +select to_yyyymmdd(to_datetime(1630833797000000)); ---- 20210905 statement query I -select toYYYYMMDD(to_date(18875)); +select to_yyyymmdd(to_date(18875)); ---- 20210905 statement query B -select toYYYYMMDD(to_datetime(1630833797000000)) = 20210905; +select to_yyyymmdd(to_datetime(1630833797000000)) = 20210905; ---- 1 @@ -322,7 +322,7 @@ select toYYYYMMDD(to_datetime(1630833797000000)) = 20210905; statement query B -select toYYYYMMDD(to_date(18875)) = 20210905; +select to_yyyymmdd(to_date(18875)) = 20210905; ---- 1 @@ -330,361 +330,361 @@ select toYYYYMMDD(to_date(18875)) = 20210905; statement query T -select toStartOfYear(to_datetime(1630812366000000)); +select to_start_of_year(to_datetime(1630812366000000)); ---- 2021-01-01 statement query T -select toStartOfISOYear(to_datetime(1630812366000000)); +select to_start_of_iso_year(to_datetime(1630812366000000)); ---- 2021-01-04 statement query T -select toStartOfYear(to_date(18869)); +select to_start_of_year(to_date(18869)); ---- 2021-01-01 statement query T -select toStartOfISOYear(to_date(18869)); +select to_start_of_iso_year(to_date(18869)); ---- 2021-01-04 statement query T -select toStartOfQuarter(to_datetime(1631705259000000)); +select to_start_of_quarter(to_datetime(1631705259000000)); ---- 2021-07-01 statement query T -select toStartOfQuarter(to_datetime(1621078059000000)); +select to_start_of_quarter(to_datetime(1621078059000000)); ---- 2021-04-01 statement query T -select toStartOfMonth(to_datetime(1631705259000000)); +select to_start_of_month(to_datetime(1631705259000000)); ---- 2021-09-01 statement query T -select toStartOfQuarter(to_date(18885)); +select to_start_of_quarter(to_date(18885)); ---- 2021-07-01 statement query T -select toStartOfQuarter(to_date(18762)); +select to_start_of_quarter(to_date(18762)); ---- 2021-04-01 statement query T -select toStartOfMonth(to_date(18885)); +select to_start_of_month(to_date(18885)); ---- 2021-09-01 statement query T -select toStartOfWeek(to_datetime(1632397739000000)); +select to_start_of_week(to_datetime(1632397739000000)); ---- 2021-09-19 statement query T -select toStartOfWeek(to_datetime(1632397739000000), 0); +select to_start_of_week(to_datetime(1632397739000000), 0); ---- 2021-09-19 statement query T -select toStartOfWeek(to_datetime(1632397739000000), 1); +select to_start_of_week(to_datetime(1632397739000000), 1); ---- 2021-09-20 statement query T -select toStartOfWeek(to_datetime(1632397739000000), 2); +select to_start_of_week(to_datetime(1632397739000000), 2); ---- 2021-09-19 statement query T -select toStartOfWeek(to_datetime(1632397739000000), 3); +select to_start_of_week(to_datetime(1632397739000000), 3); ---- 2021-09-20 statement query T -select toStartOfWeek(to_datetime(1632397739000000), 4); +select to_start_of_week(to_datetime(1632397739000000), 4); ---- 2021-09-19 statement query T -select toStartOfWeek(to_datetime(1632397739000000), 5); +select to_start_of_week(to_datetime(1632397739000000), 5); ---- 2021-09-20 statement query T -select toStartOfWeek(to_datetime(1632397739000000), 6); +select to_start_of_week(to_datetime(1632397739000000), 6); ---- 2021-09-19 statement query T -select toStartOfWeek(to_datetime(1632397739000000), 7); +select to_start_of_week(to_datetime(1632397739000000), 7); ---- 2021-09-20 statement query T -select toStartOfWeek(to_datetime(1632397739000000), 8); +select to_start_of_week(to_datetime(1632397739000000), 8); ---- 2021-09-19 statement query T -select toStartOfWeek(to_datetime(1632397739000000), 9); +select to_start_of_week(to_datetime(1632397739000000), 9); ---- 2021-09-20 statement query T -select toStartOfWeek(to_date(18769)); +select to_start_of_week(to_date(18769)); ---- 2021-05-16 statement query T -select toStartOfWeek(to_date(18769), 0); +select to_start_of_week(to_date(18769), 0); ---- 2021-05-16 statement query T -select toStartOfWeek(to_date(18769), 1); +select to_start_of_week(to_date(18769), 1); ---- 2021-05-17 statement query T -select toStartOfWeek(to_date(18769), 2); +select to_start_of_week(to_date(18769), 2); ---- 2021-05-16 statement query T -select toStartOfWeek(to_date(18769), 3); +select to_start_of_week(to_date(18769), 3); ---- 2021-05-17 statement query T -select toStartOfWeek(to_date(18769), 4); +select to_start_of_week(to_date(18769), 4); ---- 2021-05-16 statement query T -select toStartOfWeek(to_date(18769), 5); +select to_start_of_week(to_date(18769), 5); ---- 2021-05-17 statement query T -select toStartOfWeek(to_date(18769), 6); +select to_start_of_week(to_date(18769), 6); ---- 2021-05-16 statement query T -select toStartOfWeek(to_date(18769), 7); +select to_start_of_week(to_date(18769), 7); ---- 2021-05-17 statement query T -select toStartOfWeek(to_date(18769), 8); +select to_start_of_week(to_date(18769), 8); ---- 2021-05-16 statement query T -select toStartOfWeek(to_date(18769), 9); +select to_start_of_week(to_date(18769), 9); ---- 2021-05-17 statement error 1079 -select toStartOfWeek(to_date('1000-01-01')); +select to_start_of_week(to_date('1000-01-01')); statement error 1079 -select toStartOfWeek(to_datetime('1000-01-01 00:00:00')); +select to_start_of_week(to_datetime('1000-01-01 00:00:00')); statement query T -select addYears(to_date(18321), cast(1, UINT8)); -- 2020-2-29 + 1 year +select add_years(to_date(18321), cast(1, UINT8)); -- 2020-2-29 + 1 year ---- 2021-02-28 statement query T -select addYears(to_date(18321), cast(1, UINT16)); +select add_years(to_date(18321), cast(1, UINT16)); ---- 2021-02-28 statement query T -select addYears(to_date(18321), cast(1, UINT32)); +select add_years(to_date(18321), cast(1, UINT32)); ---- 2021-02-28 statement query T -select addYears(to_date(18321), cast(1, UINT64)); +select add_years(to_date(18321), cast(1, UINT64)); ---- 2021-02-28 statement query T -select addYears(to_date(18321), cast(-1, INT8)); +select add_years(to_date(18321), cast(-1, INT8)); ---- 2019-02-28 statement query T -select addYears(to_date(18321), cast(-1, INT16)); +select add_years(to_date(18321), cast(-1, INT16)); ---- 2019-02-28 statement query T -select addYears(to_date(18321), cast(-1, INT32)); +select add_years(to_date(18321), cast(-1, INT32)); ---- 2019-02-28 statement query T -select addYears(to_date(18321), cast(-1, INT64)); +select add_years(to_date(18321), cast(-1, INT64)); ---- 2019-02-28 statement query T -select addYears(to_datetime(1582970400000000), cast(50, INT8)); -- 2020-2-29T10:00:00 + 50 years +select add_years(to_datetime(1582970400000000), cast(50, INT8)); -- 2020-2-29T10:00:00 + 50 years ---- 2070-02-28 10:00:00.000000 statement query T -select addYears(to_datetime(1582970400000000), cast(-50, INT8)); -- 2020-2-29T10:00:00 - 50 years +select add_years(to_datetime(1582970400000000), cast(-50, INT8)); -- 2020-2-29T10:00:00 - 50 years ---- 1970-02-28 10:00:00.000000 statement error 1079 -select addYears(to_date('9999-12-31'), 1); +select add_years(to_date('9999-12-31'), 1); statement error 1080 -select addYears(to_datetime('9999-12-31 23:59:59'), 1); +select add_years(to_datetime('9999-12-31 23:59:59'), 1); statement query T -select subtractMonths(to_date(18321), cast(13, INT16)); -- 2020-2-29 - 13 months +select subtract_months(to_date(18321), cast(13, INT16)); -- 2020-2-29 - 13 months ---- 2019-01-29 statement query T -select subtractMonths(to_datetime(1582970400000000), cast(122, INT16)); -- 2020-2-29T10:00:00 - (12*10 + 2) months +select subtract_months(to_datetime(1582970400000000), cast(122, INT16)); -- 2020-2-29T10:00:00 - (12*10 + 2) months ---- 2009-12-29 10:00:00.000000 statement error 1079 -select subtractMonths(to_date('1000-01-01'), 1); +select subtract_months(to_date('1000-01-01'), 1); statement error 1080 -select subtractMonths(to_datetime('1000-01-01 00:00:00'), 1); +select subtract_months(to_datetime('1000-01-01 00:00:00'), 1); statement query T -select addDays(to_date(18321), cast(1, INT16)); -- 2020-2-29 + 1 day +select add_days(to_date(18321), cast(1, INT16)); -- 2020-2-29 + 1 day ---- 2020-03-01 statement query T -select addDays(to_datetime(1582970400000000), cast(-1, INT16)); -- 2020-2-29T10:00:00 - 1 day +select add_days(to_datetime(1582970400000000), cast(-1, INT16)); -- 2020-2-29T10:00:00 - 1 day ---- 2020-02-28 10:00:00.000000 statement error 1079 -select addDays(to_date('9999-12-31'), 1); +select add_days(to_date('9999-12-31'), 1); statement error 1080 -select addDays(to_datetime('9999-12-31 23:59:59'), 1); +select add_days(to_datetime('9999-12-31 23:59:59'), 1); statement query T -select addHours(to_datetime(1582970400000000), cast(25, INT32)); -- 2020-2-29T10:00:00 + 25 hours +select add_hours(to_datetime(1582970400000000), cast(25, INT32)); -- 2020-2-29T10:00:00 + 25 hours ---- 2020-03-01 11:00:00.000000 statement query T -select addHours(to_date(18321), cast(1.2, Float32)); +select add_hours(to_date(18321), cast(1.2, Float32)); ---- 2020-02-29 01:00:00.000000 statement error 1080 -select addHours(to_date('9999-12-31'), 24); +select add_hours(to_date('9999-12-31'), 24); statement error 1080 -select addHours(to_datetime('9999-12-31 23:59:59'), 1); +select add_hours(to_datetime('9999-12-31 23:59:59'), 1); statement query T -select subtractMinutes(to_datetime(1582970400000000), cast(1, INT32)); -- 2020-2-29T10:00:00 - 1 minutes +select subtract_minutes(to_datetime(1582970400000000), cast(1, INT32)); -- 2020-2-29T10:00:00 - 1 minutes ---- 2020-02-29 09:59:00.000000 statement error 1080 -select subtractMinutes(to_date('1000-01-01'), 1); +select subtract_minutes(to_date('1000-01-01'), 1); statement error 1080 -select subtractMinutes(to_datetime('1000-01-01 00:00:00'), 1); +select subtract_minutes(to_datetime('1000-01-01 00:00:00'), 1); statement query T -select addSeconds(to_datetime(1582970400000000), cast(61, INT32)); -- 2020-2-29T10:00:00 + 61 seconds +select add_seconds(to_datetime(1582970400000000), cast(61, INT32)); -- 2020-2-29T10:00:00 + 61 seconds ---- 2020-02-29 10:01:01.000000 statement query I -select toMonth(to_datetime(1633081817000000)); +select to_month(to_datetime(1633081817000000)); ---- 10 statement query I -select toMonth(to_date(18901)); +select to_month(to_date(18901)); ---- 10 statement query B -select toMonth(to_datetime(1633081817000000)) = 10; +select to_month(to_datetime(1633081817000000)) = 10; ---- 1 @@ -692,7 +692,7 @@ select toMonth(to_datetime(1633081817000000)) = 10; statement query B -select toMonth(to_date(18901)) = 10; +select to_month(to_date(18901)) = 10; ---- 1 @@ -700,19 +700,19 @@ select toMonth(to_date(18901)) = 10; statement query I -select toDayOfYear(to_datetime(1633173324000000)); +select to_day_of_year(to_datetime(1633173324000000)); ---- 275 statement query I -select toDayOfYear(to_date(18902)); +select to_day_of_year(to_date(18902)); ---- 275 statement query B -select toDayOfYear(to_datetime(1633173324000000)) = 275; +select to_day_of_year(to_datetime(1633173324000000)) = 275; ---- 1 @@ -720,7 +720,7 @@ select toDayOfYear(to_datetime(1633173324000000)) = 275; statement query B -select toDayOfYear(to_date(18902)) = 275; +select to_day_of_year(to_date(18902)) = 275; ---- 1 @@ -728,19 +728,19 @@ select toDayOfYear(to_date(18902)) = 275; statement query I -select toDayOfMonth(to_datetime(1633173324000000)); +select to_day_of_month(to_datetime(1633173324000000)); ---- 2 statement query I -select toDayOfMonth(to_date(18902)); +select to_day_of_month(to_date(18902)); ---- 2 statement query B -select toDayOfMonth(to_datetime(1633173324000000)) = 2; +select to_day_of_month(to_datetime(1633173324000000)) = 2; ---- 1 @@ -748,7 +748,7 @@ select toDayOfMonth(to_datetime(1633173324000000)) = 2; statement query B -select toDayOfMonth(to_date(18902)) = 2; +select to_day_of_month(to_date(18902)) = 2; ---- 1 @@ -756,19 +756,19 @@ select toDayOfMonth(to_date(18902)) = 2; statement query I -select toDayOfWeek(to_datetime(1633173324000000)); +select to_day_of_week(to_datetime(1633173324000000)); ---- 6 statement query I -select toDayOfWeek(to_date(18902)); +select to_day_of_week(to_date(18902)); ---- 6 statement query B -select toDayOfWeek(to_datetime(1633173324000000)) = 6; +select to_day_of_week(to_datetime(1633173324000000)) = 6; ---- 1 @@ -776,7 +776,7 @@ select toDayOfWeek(to_datetime(1633173324000000)) = 6; statement query B -select toDayOfWeek(to_date(18902)) = 6; +select to_day_of_week(to_date(18902)) = 6; ---- 1 @@ -784,7 +784,7 @@ select toDayOfWeek(to_date(18902)) = 6; statement query B -select toHour(to_datetime(1634551542000000)) = 10; +select to_hour(to_datetime(1634551542000000)) = 10; ---- 1 @@ -792,7 +792,7 @@ select toHour(to_datetime(1634551542000000)) = 10; statement query B -select toMinute(to_datetime(1634551542000000)) = 5; +select to_minute(to_datetime(1634551542000000)) = 5; ---- 1 @@ -800,7 +800,7 @@ select toMinute(to_datetime(1634551542000000)) = 5; statement query B -select toSecond(to_datetime(1634551542000000)) = 42; +select to_second(to_datetime(1634551542000000)) = 42; ---- 1 @@ -808,7 +808,7 @@ select toSecond(to_datetime(1634551542000000)) = 42; statement query B -select toMonday(to_datetime(1634614318000000)) = to_date('2021-10-18'); +select to_monday(to_datetime(1634614318000000)) = to_date('2021-10-18'); ---- 1 @@ -816,7 +816,7 @@ select toMonday(to_datetime(1634614318000000)) = to_date('2021-10-18'); statement query B -select toYear(to_datetime(1646404329000000)) = 2022; +select to_year(to_datetime(1646404329000000)) = 2022; ---- 1 diff --git a/tests/logictest/suites/base/02_function/02_0012_function_datetimes_tz b/tests/logictest/suites/base/02_function/02_0012_function_datetimes_tz index 562b66af4806a..355c51e08b44e 100644 --- a/tests/logictest/suites/base/02_function/02_0012_function_datetimes_tz +++ b/tests/logictest/suites/base/02_function/02_0012_function_datetimes_tz @@ -69,49 +69,49 @@ statement ok set timezone = 'UTC'; statement query I -select toyyyymm(to_timestamp(1619820000000000)); +select to_yyyymm(to_timestamp(1619820000000000)); ---- 202104 statement query I -select toyyyymmdd(to_timestamp(1619820000000000)); +select to_yyyymmdd(to_timestamp(1619820000000000)); ---- 20210430 statement query I -select toyyyymmddhhmmss(to_timestamp(1619820000000000)); +select to_yyyymmddhhmmss(to_timestamp(1619820000000000)); ---- 20210430220000 statement query T -select tostartofmonth(to_timestamp(1619820000000000)); +select to_start_of_month(to_timestamp(1619820000000000)); ---- 2021-04-01 statement query I -select tomonth(to_timestamp(1619820000000000)); +select to_month(to_timestamp(1619820000000000)); ---- 4 statement query I -select todayofyear(to_timestamp(1619820000000000)); +select to_day_of_year(to_timestamp(1619820000000000)); ---- 120 statement query I -select todayofmonth(to_timestamp(1619820000000000)); +select to_day_of_month(to_timestamp(1619820000000000)); ---- 30 statement query I -select todayofweek(to_timestamp(1619820000000000)); +select to_day_of_week(to_timestamp(1619820000000000)); ---- 5 @@ -120,49 +120,49 @@ statement ok set timezone = 'Asia/Shanghai'; statement query I -select toyyyymm(to_timestamp(1619820000000000)); +select to_yyyymm(to_timestamp(1619820000000000)); ---- 202105 statement query I -select toyyyymmdd(to_timestamp(1619820000000000)); +select to_yyyymmdd(to_timestamp(1619820000000000)); ---- 20210501 statement query I -select toyyyymmddhhmmss(to_timestamp(1619820000000000)); +select to_yyyymmddhhmmss(to_timestamp(1619820000000000)); ---- 20210501060000 statement query T -select tostartofmonth(to_timestamp(1619820000000000)); +select to_start_of_month(to_timestamp(1619820000000000)); ---- 2021-05-01 statement query I -select tomonth(to_timestamp(1619820000000000)); +select to_month(to_timestamp(1619820000000000)); ---- 5 statement query I -select todayofyear(to_timestamp(1619820000000000)); +select to_day_of_year(to_timestamp(1619820000000000)); ---- 121 statement query I -select todayofmonth(to_timestamp(1619820000000000)); +select to_day_of_month(to_timestamp(1619820000000000)); ---- 1 statement query I -select todayofweek(to_timestamp(1619820000000000)); +select to_day_of_week(to_timestamp(1619820000000000)); ---- 6 @@ -177,55 +177,55 @@ statement ok set timezone = 'UTC'; statement query T -select tostartofsecond(to_timestamp(1619822911999000)); +select to_start_of_second(to_timestamp(1619822911999000)); ---- 2021-04-30 22:48:31 statement query T -select tostartofminute(to_timestamp(1619822911999000)); +select to_start_of_minute(to_timestamp(1619822911999000)); ---- 2021-04-30 22:48:00 statement query T -select tostartoffiveminutes(to_timestamp(1619822911999000)); +select to_start_of_five_minutes(to_timestamp(1619822911999000)); ---- 2021-04-30 22:45:00 statement query T -select tostartoftenminutes(to_timestamp(1619822911999000)); +select to_start_of_ten_minutes(to_timestamp(1619822911999000)); ---- 2021-04-30 22:40:00 statement query T -select tostartoffifteenminutes(to_timestamp(1619822911999000)); +select to_start_of_fifteen_minutes(to_timestamp(1619822911999000)); ---- 2021-04-30 22:45:00 statement query T -select timeslot(to_timestamp(1619822911999000)); +select time_slot(to_timestamp(1619822911999000)); ---- 2021-04-30 22:30:00 statement query T -select tostartofhour(to_timestamp(1619822911999000)); +select to_start_of_hour(to_timestamp(1619822911999000)); ---- 2021-04-30 22:00:00 statement query T -select tostartofday(to_timestamp(1619822911999000)); +select to_start_of_day(to_timestamp(1619822911999000)); ---- 2021-04-30 00:00:00 statement query T -select tostartofweek(to_timestamp(1619822911999000)); +select to_start_of_week(to_timestamp(1619822911999000)); ---- 2021-04-25 @@ -234,55 +234,55 @@ statement ok set timezone = 'Asia/Shanghai'; statement query T -select tostartofsecond(to_timestamp(1619822911999000)); +select to_start_of_second(to_timestamp(1619822911999000)); ---- 2021-05-01 06:48:31 statement query T -select tostartofminute(to_timestamp(1619822911999000)); +select to_start_of_minute(to_timestamp(1619822911999000)); ---- 2021-05-01 06:48:00 statement query T -select tostartoffiveminutes(to_timestamp(1619822911999000)); +select to_start_of_five_minutes(to_timestamp(1619822911999000)); ---- 2021-05-01 06:45:00 statement query T -select tostartoftenminutes(to_timestamp(1619822911999000)); +select to_start_of_ten_minutes(to_timestamp(1619822911999000)); ---- 2021-05-01 06:40:00 statement query T -select tostartoffifteenminutes(to_timestamp(1619822911999000)); +select to_start_of_fifteen_minutes(to_timestamp(1619822911999000)); ---- 2021-05-01 06:45:00 statement query T -select timeslot(to_timestamp(1619822911999000)); +select time_slot(to_timestamp(1619822911999000)); ---- 2021-05-01 06:30:00 statement query T -select tostartofhour(to_timestamp(1619822911999000)); +select to_start_of_hour(to_timestamp(1619822911999000)); ---- 2021-05-01 06:00:00 statement query T -select tostartofday(to_timestamp(1619822911999000)); +select to_start_of_day(to_timestamp(1619822911999000)); ---- 2021-05-01 00:00:00 statement query T -select tostartofweek(to_timestamp(1619822911999000)); +select to_start_of_week(to_timestamp(1619822911999000)); ---- 2021-04-25 @@ -291,7 +291,7 @@ statement ok set timezone = 'UTC'; statement query T -select addMonths(to_timestamp(1619822911999000), 1); +select add_months(to_timestamp(1619822911999000), 1); ---- 2021-05-30 22:48:31.999000 @@ -304,7 +304,7 @@ select to_timestamp(1583013600000000); statement query T -select addYears(to_timestamp(1583013600000000), 1); +select add_years(to_timestamp(1583013600000000), 1); ---- 2021-02-28 22:00:00.000000 @@ -314,7 +314,7 @@ statement ok set timezone = 'Asia/Shanghai'; statement query T -select addMonths(to_timestamp(1619822911999000), 1); +select add_months(to_timestamp(1619822911999000), 1); ---- 2021-06-01 14:48:31.999000 @@ -326,7 +326,7 @@ select to_timestamp(1583013600000000); 2020-03-01 06:00:00.000000 statement query T -select addYears(to_timestamp(1583013600000000), 1); +select add_years(to_timestamp(1583013600000000), 1); ---- 2021-03-01 14:00:00.000000 diff --git a/tests/logictest/suites/base/06_show/06_0005_show_functions b/tests/logictest/suites/base/06_show/06_0005_show_functions index 8c6072e498d72..a86fad638a910 100644 --- a/tests/logictest/suites/base/06_show/06_0005_show_functions +++ b/tests/logictest/suites/base/06_show/06_0005_show_functions @@ -3,18 +3,15 @@ SHOW FUNCTIONS LIKE 'today%'; ---- today 1 0 Returns current date. -todayofmonth 1 0 Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). -todayofweek 1 0 Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7). -todayofyear 1 0 Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366). statement query TBBTT -SHOW FUNCTIONS WHERE name LIKE 'today%'; +SHOW FUNCTIONS LIKE 'to_day%'; ---- -today 1 0 Returns current date. -todayofmonth 1 0 Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). -todayofweek 1 0 Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7). -todayofyear 1 0 Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366). +to_day_of_month 1 0 Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). +to_day_of_week 1 0 Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7). +to_day_of_year 1 0 Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366). + diff --git a/tests/logictest/suites/base/06_show/06_0005_show_functions_v2 b/tests/logictest/suites/base/06_show/06_0005_show_functions_v2 index 24f1878f5c799..6a24007fd6492 100644 --- a/tests/logictest/suites/base/06_show/06_0005_show_functions_v2 +++ b/tests/logictest/suites/base/06_show/06_0005_show_functions_v2 @@ -6,16 +6,12 @@ SHOW FUNCTIONS LIKE 'today%'; ---- today 1 0 Returns current date. -todayofmonth 1 0 Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). -todayofweek 1 0 Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7). -todayofyear 1 0 Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366). statement query TBBTT -SHOW FUNCTIONS WHERE name LIKE 'today%'; +SHOW FUNCTIONS WHERE name LIKE 'to_day%'; ---- -today 1 0 Returns current date. -todayofmonth 1 0 Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). -todayofweek 1 0 Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7). -todayofyear 1 0 Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366). +to_day_of_month 1 0 Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). +to_day_of_week 1 0 Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7). +to_day_of_year 1 0 Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366). diff --git a/tests/suites/0_stateless/13_tpch/13_0001_q1.sql b/tests/suites/0_stateless/13_tpch/13_0001_q1.sql index 32e79fa307464..13fd3647b2bc9 100644 --- a/tests/suites/0_stateless/13_tpch/13_0001_q1.sql +++ b/tests/suites/0_stateless/13_tpch/13_0001_q1.sql @@ -13,7 +13,7 @@ select from lineitem where - l_shipdate <= addDays(to_date('1998-12-01'), 90) + l_shipdate <= add_days(to_date('1998-12-01'), 90) group by l_returnflag, l_linestatus diff --git a/tests/suites/0_stateless/13_tpch/13_0004_q4.sql b/tests/suites/0_stateless/13_tpch/13_0004_q4.sql index f7e75fc9e3a56..4e79faeacaba6 100644 --- a/tests/suites/0_stateless/13_tpch/13_0004_q4.sql +++ b/tests/suites/0_stateless/13_tpch/13_0004_q4.sql @@ -6,7 +6,7 @@ from orders where o_orderdate >= to_date('1993-07-01') - and o_orderdate < addMonths(to_date('1993-07-01'), 3) + and o_orderdate < add_months(to_date('1993-07-01'), 3) and exists ( select * diff --git a/tests/suites/0_stateless/13_tpch/13_0005_q5.sql b/tests/suites/0_stateless/13_tpch/13_0005_q5.sql index e5359cf8cba7e..c27f0ba8bd141 100644 --- a/tests/suites/0_stateless/13_tpch/13_0005_q5.sql +++ b/tests/suites/0_stateless/13_tpch/13_0005_q5.sql @@ -18,7 +18,7 @@ where and n_regionkey = r_regionkey and r_name = 'ASIA' and o_orderdate >= to_date('1994-01-01') - and o_orderdate < addYears(to_date('1994-01-01'), 1) + and o_orderdate < add_years(to_date('1994-01-01'), 1) group by n_name order by diff --git a/tests/suites/0_stateless/13_tpch/13_0010_q10.sql b/tests/suites/0_stateless/13_tpch/13_0010_q10.sql index df48f38a056dc..0deb051d24978 100644 --- a/tests/suites/0_stateless/13_tpch/13_0010_q10.sql +++ b/tests/suites/0_stateless/13_tpch/13_0010_q10.sql @@ -17,7 +17,7 @@ where c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate >= to_date('1993-10-01') - and o_orderdate < addMonths(to_date('1993-10-01'), 3) + and o_orderdate < add_months(to_date('1993-10-01'), 3) and l_returnflag = 'R' and c_nationkey = n_nationkey group by diff --git a/tests/suites/0_stateless/13_tpch/13_0014_q14.sql b/tests/suites/0_stateless/13_tpch/13_0014_q14.sql index 6c45a65258aab..c740ce3f2469a 100644 --- a/tests/suites/0_stateless/13_tpch/13_0014_q14.sql +++ b/tests/suites/0_stateless/13_tpch/13_0014_q14.sql @@ -11,4 +11,4 @@ from where l_partkey = p_partkey and l_shipdate >= to_date('1995-09-01') - and l_shipdate < addMonths(to_date('1995-09-01'), 1); \ No newline at end of file + and l_shipdate < add_months(to_date('1995-09-01'), 1); \ No newline at end of file diff --git a/tests/suites/0_stateless/13_tpch/13_0020_q20.sql b/tests/suites/0_stateless/13_tpch/13_0020_q20.sql index 6adc0df6f3be9..7531987c11ca8 100644 --- a/tests/suites/0_stateless/13_tpch/13_0020_q20.sql +++ b/tests/suites/0_stateless/13_tpch/13_0020_q20.sql @@ -29,7 +29,7 @@ where l_partkey = ps_partkey and l_suppkey = ps_suppkey and l_shipdate >= to_date('1994-01-01') - and l_shipdate < addYears(to_date('1994-01-01'), 1) + and l_shipdate < add_years(to_date('1994-01-01'), 1) ) ) and s_nationkey = n_nationkey From 73734929edae654508c9939913f1e0f6fe57d8de Mon Sep 17 00:00:00 2001 From: leiysky Date: Wed, 10 Aug 2022 11:43:19 +0800 Subject: [PATCH 33/59] introduce AST visitor --- common/ast/src/ast/query.rs | 6 +- common/ast/src/lib.rs | 8 + common/ast/src/parser/query.rs | 2 +- common/ast/src/visitors/mod.rs | 23 + common/ast/src/visitors/visitor.rs | 500 ++++++++++++++++ common/ast/src/visitors/visitor_mut.rs | 562 ++++++++++++++++++ common/ast/src/visitors/walk.rs | 288 +++++++++ common/ast/src/visitors/walk_mut.rs | 294 +++++++++ query/src/sql/planner/binder/ddl/table.rs | 11 +- query/src/sql/planner/mod.rs | 1 + query/src/sql/planner/semantic/mod.rs | 1 + .../sql/planner/semantic/name_resolution.rs | 12 + .../sql/planner/semantic/name_resolution.rs | 22 + .../base/15_query/case_sensitivity/ddl.test | 2 +- 14 files changed, 1726 insertions(+), 6 deletions(-) create mode 100644 common/ast/src/visitors/mod.rs create mode 100644 common/ast/src/visitors/visitor.rs create mode 100644 common/ast/src/visitors/visitor_mut.rs create mode 100644 common/ast/src/visitors/walk.rs create mode 100644 common/ast/src/visitors/walk_mut.rs diff --git a/common/ast/src/ast/query.rs b/common/ast/src/ast/query.rs index 3d81351a027e7..692daa1307f5a 100644 --- a/common/ast/src/ast/query.rs +++ b/common/ast/src/ast/query.rs @@ -45,11 +45,11 @@ pub struct Query<'a> { pub struct With<'a> { pub span: &'a [Token<'a>], pub recursive: bool, - pub ctes: Vec>, + pub ctes: Vec>, } #[derive(Debug, Clone, PartialEq)] -pub struct Cte<'a> { +pub struct CTE<'a> { pub span: &'a [Token<'a>], pub alias: TableAlias<'a>, pub query: Query<'a>, @@ -428,7 +428,7 @@ impl<'a> Display for SetExpr<'a> { } } -impl<'a> Display for Cte<'a> { +impl<'a> Display for CTE<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{} AS ({})", self.alias, self.query)?; Ok(()) diff --git a/common/ast/src/lib.rs b/common/ast/src/lib.rs index 02f57824d5f48..b558497921b15 100644 --- a/common/ast/src/lib.rs +++ b/common/ast/src/lib.rs @@ -20,6 +20,14 @@ pub use error::DisplayError; pub use error::Error; pub use error::ErrorKind; +mod visitors; +pub use visitors::walk_expr; +pub use visitors::walk_expr_mut; +pub use visitors::walk_query; +pub use visitors::walk_query_mut; +pub use visitors::Visitor; +pub use visitors::VisitorMut; + mod input; pub use input::Input; diff --git a/common/ast/src/parser/query.rs b/common/ast/src/parser/query.rs index 43403996f63ea..9884fbcedfae8 100644 --- a/common/ast/src/parser/query.rs +++ b/common/ast/src/parser/query.rs @@ -62,7 +62,7 @@ pub fn with(i: Input) -> IResult { consumed(rule! { #table_alias ~ AS ~ "(" ~ #query ~ ")" }), - |(span, (table_alias, _, _, query, _))| Cte { + |(span, (table_alias, _, _, query, _))| CTE { span: span.0, alias: table_alias, query, diff --git a/common/ast/src/visitors/mod.rs b/common/ast/src/visitors/mod.rs new file mode 100644 index 0000000000000..6e92694ff8a0d --- /dev/null +++ b/common/ast/src/visitors/mod.rs @@ -0,0 +1,23 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod visitor; +mod visitor_mut; +mod walk; +mod walk_mut; + +pub use visitor::Visitor; +pub use visitor_mut::VisitorMut; +pub use walk::*; +pub use walk_mut::*; diff --git a/common/ast/src/visitors/visitor.rs b/common/ast/src/visitors/visitor.rs new file mode 100644 index 0000000000000..92e0f124eab03 --- /dev/null +++ b/common/ast/src/visitors/visitor.rs @@ -0,0 +1,500 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_datavalues::IntervalKind; +use common_meta_types::PrincipalIdentity; +use common_meta_types::UserIdentity; + +use super::walk::walk_cte; +use super::walk::walk_expr; +use super::walk::walk_identifier; +use super::walk::walk_join_condition; +use super::walk::walk_query; +use super::walk::walk_select_target; +use super::walk::walk_set_expr; +use super::walk::walk_table_reference; +use super::walk_time_travel_point; +use crate::ast::*; +use crate::parser::token::Token; + +pub trait Visitor<'ast>: Sized { + fn visit_expr(&mut self, expr: &'ast Expr<'ast>) { + walk_expr(self, expr); + } + + fn visit_identifier(&mut self, _ident: &'ast Identifier<'ast>) {} + + fn visit_column_ref( + &mut self, + _span: &'ast [Token<'ast>], + database: &'ast Option>, + table: &'ast Option>, + column: &'ast Identifier<'ast>, + ) { + if let Some(database) = database { + walk_identifier(self, database); + } + + if let Some(table) = table { + walk_identifier(self, table); + } + + walk_identifier(self, column); + } + + fn visit_is_null(&mut self, _span: &'ast [Token<'ast>], expr: &'ast Expr<'ast>, _not: bool) { + walk_expr(self, expr); + } + + fn visit_is_distinct_from( + &mut self, + _span: &'ast [Token<'ast>], + left: &'ast Expr<'ast>, + right: &'ast Expr<'ast>, + _not: bool, + ) { + walk_expr(self, left); + walk_expr(self, right); + } + + fn visit_in_list( + &mut self, + _span: &'ast [Token<'ast>], + expr: &'ast Expr<'ast>, + list: &'ast [Expr<'ast>], + _not: bool, + ) { + walk_expr(self, expr); + for expr in list { + walk_expr(self, expr); + } + } + + fn visit_in_subquery( + &mut self, + _span: &'ast [Token<'ast>], + expr: &'ast Expr<'ast>, + subquery: &'ast Query<'ast>, + _not: bool, + ) { + walk_expr(self, expr); + walk_query(self, subquery); + } + + fn visit_between( + &mut self, + _span: &'ast [Token<'ast>], + _expr: &'ast Expr<'ast>, + _low: &'ast Expr<'ast>, + _high: &'ast Expr<'ast>, + _not: bool, + ) { + } + + fn visit_binary_op( + &mut self, + _span: &'ast [Token<'ast>], + _op: &'ast BinaryOperator, + _left: &'ast Expr<'ast>, + _right: &'ast Expr<'ast>, + ) { + } + + fn visit_unary_op( + &mut self, + _span: &'ast [Token<'ast>], + _op: &'ast UnaryOperator, + _expr: &'ast Expr<'ast>, + ) { + } + + fn visit_cast( + &mut self, + _span: &'ast [Token<'ast>], + _expr: &'ast Expr<'ast>, + _target_type: &'ast TypeName, + _pg_style: bool, + ) { + } + + fn visit_try_cast( + &mut self, + _span: &'ast [Token<'ast>], + _expr: &'ast Expr<'ast>, + _target_type: &'ast TypeName, + ) { + } + + fn visit_extract( + &mut self, + _span: &'ast [Token<'ast>], + _kind: &'ast IntervalKind, + _expr: &'ast Expr<'ast>, + ) { + } + + fn visit_positon( + &mut self, + _span: &'ast [Token<'ast>], + _substr_expr: &'ast Expr<'ast>, + _str_expr: &'ast Expr<'ast>, + ) { + } + + fn visit_substring( + &mut self, + _span: &'ast [Token<'ast>], + _expr: &'ast Expr<'ast>, + _substring_from: &'ast Option>>, + _substring_for: &'ast Option>>, + ) { + } + + fn visit_trim( + &mut self, + _span: &'ast [Token<'ast>], + _expr: &'ast Expr<'ast>, + _trim_where: &'ast Option<(TrimWhere, Box>)>, + ) { + } + + fn visit_literal(&mut self, _span: &'ast [Token<'ast>], _lit: &'ast Literal) {} + + fn visit_count_all(&mut self, _span: &'ast [Token<'ast>]) {} + + fn visit_tuple(&mut self, _span: &'ast [Token<'ast>], _elements: &'ast [Expr<'ast>]) {} + + fn visit_function_call( + &mut self, + _span: &'ast [Token<'ast>], + _distinct: bool, + _name: &'ast Identifier<'ast>, + _args: &'ast [Expr<'ast>], + _params: &'ast [Literal], + ) { + } + + fn visit_case_when( + &mut self, + _span: &'ast [Token<'ast>], + _operand: &'ast Option>>, + _conditions: &'ast [Expr<'ast>], + _results: &'ast [Expr<'ast>], + _else_result: &'ast Option>>, + ) { + } + + fn visit_exists( + &mut self, + _span: &'ast [Token<'ast>], + _not: bool, + _subquery: &'ast Query<'ast>, + ) { + } + + fn visit_subquery( + &mut self, + _span: &'ast [Token<'ast>], + _modifier: &'ast Option, + _subquery: &'ast Query<'ast>, + ) { + } + + fn visit_map_access( + &mut self, + _span: &'ast [Token<'ast>], + expr: &'ast Expr<'ast>, + _accessor: &'ast MapAccessor<'ast>, + ) { + walk_expr(self, expr); + } + + fn visit_array(&mut self, _span: &'ast [Token<'ast>], _exprs: &'ast [Expr<'ast>]) {} + + fn visit_interval( + &mut self, + _span: &'ast [Token<'ast>], + _expr: &'ast Expr<'ast>, + _unit: &'ast IntervalKind, + ) { + } + + fn visit_date_add( + &mut self, + _span: &'ast [Token<'ast>], + _date: &'ast Expr<'ast>, + _interval: &'ast Expr<'ast>, + _unit: &'ast IntervalKind, + ) { + } + + fn visit_date_sub( + &mut self, + _span: &'ast [Token<'ast>], + _date: &'ast Expr<'ast>, + _interval: &'ast Expr<'ast>, + _unit: &'ast IntervalKind, + ) { + } + + fn visit_date_trunc( + &mut self, + _span: &'ast [Token<'ast>], + _unit: &'ast IntervalKind, + _date: &'ast Expr<'ast>, + ) { + } + + fn visit_nullif( + &mut self, + _span: &'ast [Token<'ast>], + _expr1: &'ast Expr<'ast>, + _expr2: &'ast Expr<'ast>, + ) { + } + + fn visit_coalesce(&mut self, _span: &'ast [Token<'ast>], _exprs: &'ast [Expr<'ast>]) {} + + fn visit_ifnull( + &mut self, + _span: &'ast [Token<'ast>], + _expr1: &'ast Expr<'ast>, + _expr2: &'ast Expr<'ast>, + ) { + } + + fn visit_statement(&mut self, _statement: &'ast Statement<'ast>) {} + + fn visit_query(&mut self, _query: &'ast Query<'ast>) {} + + fn visit_explain(&mut self, _kind: &'ast ExplainKind, _query: &'ast Statement<'ast>) {} + + fn visit_copy(&mut self, _copy: &'ast CopyStmt<'ast>) {} + + fn visit_call(&mut self, _call: &'ast CallStmt) {} + + fn visit_show_settings(&mut self, _like: &'ast Option) {} + + fn visit_show_process_list(&mut self) {} + + fn visit_show_metrics(&mut self) {} + + fn visit_show_engines(&mut self) {} + + fn visit_show_functions(&mut self, _limit: &'ast Option>) {} + + fn visit_kill(&mut self, _kill_target: &'ast KillTarget, _object_id: &'ast str) {} + + fn visit_set_variable( + &mut self, + _is_global: bool, + _variable: &'ast Identifier<'ast>, + _value: &'ast Literal, + ) { + } + + fn visit_insert(&mut self, _insert: &'ast InsertStmt<'ast>) {} + + fn visit_delete( + &mut self, + _table_reference: &'ast TableReference<'ast>, + _selection: &'ast Option>, + ) { + } + + fn visit_show_databases(&mut self, _stmt: &'ast ShowDatabasesStmt<'ast>) {} + + fn visit_show_create_databases(&mut self, _stmt: &'ast ShowCreateDatabaseStmt<'ast>) {} + + fn visit_create_database(&mut self, _stmt: &'ast CreateDatabaseStmt<'ast>) {} + + fn visit_drop_database(&mut self, _stmt: &'ast DropDatabaseStmt<'ast>) {} + + fn visit_undrop_database(&mut self, _stmt: &'ast UndropDatabaseStmt<'ast>) {} + + fn visit_alter_database(&mut self, _stmt: &'ast AlterDatabaseStmt<'ast>) {} + + fn visit_use_database(&mut self, _database: &'ast Identifier<'ast>) {} + + fn visit_show_tables(&mut self, _stmt: &'ast ShowTablesStmt<'ast>) {} + + fn visit_show_create_table(&mut self, _stmt: &'ast ShowCreateTableStmt<'ast>) {} + + fn visit_describe_table(&mut self, _stmt: &'ast DescribeTableStmt<'ast>) {} + + fn visit_show_tables_status(&mut self, _stmt: &'ast ShowTablesStatusStmt<'ast>) {} + + fn visit_create_table(&mut self, _stmt: &'ast CreateTableStmt<'ast>) {} + + fn visit_drop_table(&mut self, _stmt: &'ast DropTableStmt<'ast>) {} + + fn visit_undrop_table(&mut self, _stmt: &'ast UndropTableStmt<'ast>) {} + + fn visit_alter_table(&mut self, _stmt: &'ast AlterTableStmt<'ast>) {} + + fn visit_rename_table(&mut self, _stmt: &'ast RenameTableStmt<'ast>) {} + + fn visit_truncate_table(&mut self, _stmt: &'ast TruncateTableStmt<'ast>) {} + + fn visit_optimize_table(&mut self, _stmt: &'ast OptimizeTableStmt<'ast>) {} + + fn visit_exists_table(&mut self, _stmt: &'ast ExistsTableStmt<'ast>) {} + + fn visit_create_view(&mut self, _stmt: &'ast CreateViewStmt<'ast>) {} + + fn visit_alter_view(&mut self, _stmt: &'ast AlterViewStmt<'ast>) {} + + fn visit_show_users(&mut self) {} + + fn visit_create_user(&mut self, _stmt: &'ast CreateUserStmt) {} + + fn visit_alter_user(&mut self, _stmt: &'ast AlterUserStmt) {} + + fn visit_drop_user(&mut self, _if_exists: bool, _user: &'ast UserIdentity) {} + + fn visit_show_roles(&mut self) {} + + fn visit_create_role(&mut self, _if_not_exists: bool, _role_name: &'ast str) {} + + fn visit_drop_role(&mut self, _if_exists: bool, _role_name: &'ast str) {} + + fn visit_grant(&mut self, _grant: &'ast GrantStmt) {} + + fn visit_show_grant(&mut self, _principal: &'ast Option) {} + + fn visit_revoke(&mut self, _revoke: &'ast RevokeStmt) {} + + fn visit_create_udf( + &mut self, + _if_not_exists: bool, + _udf_name: &'ast Identifier<'ast>, + _parameters: &'ast [Identifier<'ast>], + _definition: &'ast Expr<'ast>, + _description: &'ast Option, + ) { + } + + fn visit_drop_udf(&mut self, _if_exists: bool, _udf_name: &'ast Identifier<'ast>) {} + + fn visit_alter_udf( + &mut self, + _udf_name: &'ast Identifier<'ast>, + _parameters: &'ast [Identifier<'ast>], + _definition: &'ast Expr<'ast>, + _description: &'ast Option, + ) { + } + + fn visit_create_stage(&mut self, _stmt: &'ast CreateStageStmt) {} + + fn visit_show_stages(&mut self) {} + + fn visit_drop_stage(&mut self, _if_exists: bool, _stage_name: &'ast str) {} + + fn visit_describe_stage(&mut self, _stage_name: &'ast str) {} + + fn visit_remove_stage(&mut self, _location: &'ast str, _pattern: &'ast str) {} + + fn visit_list_stage(&mut self, _location: &'ast str, _pattern: &'ast str) {} + + fn visit_presign(&mut self, _presign: &'ast PresignStmt) {} + + fn visit_create_share(&mut self, _stmt: &'ast CreateShareStmt<'ast>) {} + + fn visit_drop_share(&mut self, _stmt: &'ast DropShareStmt<'ast>) {} + + fn visit_grant_share_object(&mut self, _stmt: &'ast GrantShareObjectStmt<'ast>) {} + + fn visit_revoke_share_object(&mut self, _stmt: &'ast RevokeShareObjectStmt<'ast>) {} + + fn visit_with(&mut self, with: &'ast With<'ast>) { + let With { ctes, .. } = with; + for cte in ctes.iter() { + walk_cte(self, cte); + } + } + + fn visit_set_expr(&mut self, expr: &'ast SetExpr<'ast>) { + walk_set_expr(self, expr); + } + + fn visit_set_operation(&mut self, op: &'ast SetOperation<'ast>) { + let SetOperation { left, right, .. } = op; + + walk_set_expr(self, left); + walk_set_expr(self, right); + } + + fn visit_order_by(&mut self, order_by: &'ast OrderByExpr<'ast>) { + let OrderByExpr { expr, .. } = order_by; + walk_expr(self, expr); + } + + fn visit_select_stmt(&mut self, stmt: &'ast SelectStmt<'ast>) { + let SelectStmt { + select_list, + from, + selection, + group_by, + having, + .. + } = stmt; + + for target in select_list.iter() { + walk_select_target(self, target); + } + + for table_ref in from.iter() { + walk_table_reference(self, table_ref); + } + + if let Some(selection) = selection { + walk_expr(self, selection); + } + + for expr in group_by.iter() { + walk_expr(self, expr); + } + + if let Some(having) = having { + walk_expr(self, having); + } + } + + fn visit_select_target(&mut self, target: &'ast SelectTarget<'ast>) { + walk_select_target(self, target); + } + + fn visit_table_reference(&mut self, table: &'ast TableReference<'ast>) { + walk_table_reference(self, table); + } + + fn visit_time_travel_point(&mut self, time: &'ast TimeTravelPoint<'ast>) { + walk_time_travel_point(self, time); + } + + fn visit_join(&mut self, join: &'ast Join<'ast>) { + let Join { + left, + right, + condition, + .. + } = join; + + walk_table_reference(self, left); + walk_table_reference(self, right); + + walk_join_condition(self, condition); + } +} diff --git a/common/ast/src/visitors/visitor_mut.rs b/common/ast/src/visitors/visitor_mut.rs new file mode 100644 index 0000000000000..5da9a99cccc67 --- /dev/null +++ b/common/ast/src/visitors/visitor_mut.rs @@ -0,0 +1,562 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_datavalues::IntervalKind; +use common_meta_types::PrincipalIdentity; +use common_meta_types::UserIdentity; + +use super::walk_mut::walk_cte_mut; +use super::walk_mut::walk_expr_mut; +use super::walk_mut::walk_identifier_mut; +use super::walk_mut::walk_join_condition_mut; +use super::walk_mut::walk_query_mut; +use super::walk_mut::walk_select_target_mut; +use super::walk_mut::walk_set_expr_mut; +use super::walk_mut::walk_table_reference_mut; +use super::walk_time_travel_point_mut; +use crate::ast::*; +use crate::parser::token::Token; + +pub trait VisitorMut: Sized { + fn visit_expr(&mut self, expr: &mut Expr<'_>) { + walk_expr_mut(self, expr); + } + + fn visit_identifier(&mut self, _ident: &mut Identifier<'_>) {} + + fn visit_column_ref( + &mut self, + _span: &mut &[Token<'_>], + database: &mut Option>, + table: &mut Option>, + column: &mut Identifier<'_>, + ) { + if let Some(database) = database { + walk_identifier_mut(self, database); + } + + if let Some(table) = table { + walk_identifier_mut(self, table); + } + + walk_identifier_mut(self, column); + } + + fn visit_is_null(&mut self, _span: &mut &[Token<'_>], expr: &mut Expr<'_>, _not: bool) { + walk_expr_mut(self, expr); + } + + fn visit_is_distinct_from( + &mut self, + _span: &mut &[Token<'_>], + left: &mut Expr<'_>, + right: &mut Expr<'_>, + _not: bool, + ) { + walk_expr_mut(self, left); + walk_expr_mut(self, right); + } + + fn visit_in_list( + &mut self, + _span: &mut &[Token<'_>], + expr: &mut Expr<'_>, + list: &mut [Expr<'_>], + _not: bool, + ) { + walk_expr_mut(self, expr); + for expr in list { + walk_expr_mut(self, expr); + } + } + + fn visit_in_subquery( + &mut self, + _span: &mut &[Token<'_>], + expr: &mut Expr<'_>, + subquery: &mut Query<'_>, + _not: bool, + ) { + walk_expr_mut(self, expr); + walk_query_mut(self, subquery); + } + + fn visit_between( + &mut self, + _span: &mut &[Token<'_>], + expr: &mut Expr<'_>, + low: &mut Expr<'_>, + high: &mut Expr<'_>, + _not: bool, + ) { + walk_expr_mut(self, expr); + walk_expr_mut(self, low); + walk_expr_mut(self, high); + } + + fn visit_binary_op( + &mut self, + _span: &mut &[Token<'_>], + _op: &mut BinaryOperator, + left: &mut Expr<'_>, + right: &mut Expr<'_>, + ) { + walk_expr_mut(self, left); + walk_expr_mut(self, right); + } + + fn visit_unary_op( + &mut self, + _span: &mut &[Token<'_>], + _op: &mut UnaryOperator, + expr: &mut Expr<'_>, + ) { + walk_expr_mut(self, expr); + } + + fn visit_cast( + &mut self, + _span: &mut &[Token<'_>], + expr: &mut Expr<'_>, + _target_type: &mut TypeName, + _pg_style: bool, + ) { + walk_expr_mut(self, expr); + } + + fn visit_try_cast( + &mut self, + _span: &mut &[Token<'_>], + expr: &mut Expr<'_>, + _target_type: &mut TypeName, + ) { + walk_expr_mut(self, expr); + } + + fn visit_extract( + &mut self, + _span: &mut &[Token<'_>], + _kind: &mut IntervalKind, + expr: &mut Expr<'_>, + ) { + walk_expr_mut(self, expr); + } + + fn visit_positon( + &mut self, + _span: &mut &[Token<'_>], + substr_expr: &mut Expr<'_>, + str_expr: &mut Expr<'_>, + ) { + walk_expr_mut(self, substr_expr); + walk_expr_mut(self, str_expr); + } + + fn visit_substring( + &mut self, + _span: &mut &[Token<'_>], + expr: &mut Expr<'_>, + substring_from: &mut Option>>, + substring_for: &mut Option>>, + ) { + walk_expr_mut(self, expr); + + if let Some(substring_from) = substring_from { + walk_expr_mut(self, substring_from); + } + + if let Some(substring_for) = substring_for { + walk_expr_mut(self, substring_for); + } + } + + fn visit_trim( + &mut self, + _span: &mut &[Token<'_>], + expr: &mut Expr<'_>, + trim_where: &mut Option<(TrimWhere, Box>)>, + ) { + walk_expr_mut(self, expr); + + if let Some((_, trim_where_expr)) = trim_where { + walk_expr_mut(self, trim_where_expr); + } + } + + fn visit_literal(&mut self, _span: &mut &[Token<'_>], _lit: &mut Literal) {} + + fn visit_count_all(&mut self, _span: &mut &[Token<'_>]) {} + + fn visit_tuple(&mut self, _span: &mut &[Token<'_>], elements: &mut [Expr<'_>]) { + for elem in elements.iter_mut() { + walk_expr_mut(self, elem); + } + } + + fn visit_function_call( + &mut self, + _span: &mut &[Token<'_>], + _distinct: bool, + _name: &mut Identifier<'_>, + args: &mut [Expr<'_>], + _params: &mut [Literal], + ) { + for arg in args.iter_mut() { + walk_expr_mut(self, arg); + } + } + + fn visit_case_when( + &mut self, + _span: &mut &[Token<'_>], + operand: &mut Option>>, + conditions: &mut [Expr<'_>], + results: &mut [Expr<'_>], + else_result: &mut Option>>, + ) { + if let Some(operand) = operand { + walk_expr_mut(self, operand); + } + + for condition in conditions.iter_mut() { + walk_expr_mut(self, condition); + } + + for result in results.iter_mut() { + walk_expr_mut(self, result); + } + + if let Some(else_result) = else_result { + walk_expr_mut(self, else_result); + } + } + + fn visit_exists(&mut self, _span: &mut &[Token<'_>], _not: bool, subquery: &mut Query<'_>) { + walk_query_mut(self, subquery); + } + + fn visit_subquery( + &mut self, + _span: &mut &[Token<'_>], + _modifier: &mut Option, + subquery: &mut Query<'_>, + ) { + walk_query_mut(self, subquery); + } + + fn visit_map_access( + &mut self, + _span: &mut &[Token<'_>], + expr: &mut Expr<'_>, + _accessor: &mut MapAccessor<'_>, + ) { + walk_expr_mut(self, expr); + } + + fn visit_array(&mut self, _span: &mut &[Token<'_>], elements: &mut [Expr<'_>]) { + for elem in elements.iter_mut() { + walk_expr_mut(self, elem); + } + } + + fn visit_interval( + &mut self, + _span: &mut &[Token<'_>], + expr: &mut Expr<'_>, + _unit: &mut IntervalKind, + ) { + walk_expr_mut(self, expr); + } + + fn visit_date_add( + &mut self, + _span: &mut &[Token<'_>], + date: &mut Expr<'_>, + interval: &mut Expr<'_>, + _unit: &mut IntervalKind, + ) { + walk_expr_mut(self, date); + walk_expr_mut(self, interval); + } + + fn visit_date_sub( + &mut self, + _span: &mut &[Token<'_>], + date: &mut Expr<'_>, + interval: &mut Expr<'_>, + _unit: &mut IntervalKind, + ) { + walk_expr_mut(self, date); + walk_expr_mut(self, interval); + } + + fn visit_date_trunc( + &mut self, + _span: &mut &[Token<'_>], + _unit: &mut IntervalKind, + date: &mut Expr<'_>, + ) { + walk_expr_mut(self, date); + } + + fn visit_nullif( + &mut self, + _span: &mut &[Token<'_>], + expr1: &mut Expr<'_>, + expr2: &mut Expr<'_>, + ) { + walk_expr_mut(self, expr1); + walk_expr_mut(self, expr2); + } + + fn visit_coalesce(&mut self, _span: &mut &[Token<'_>], exprs: &mut [Expr<'_>]) { + for expr in exprs.iter_mut() { + walk_expr_mut(self, expr); + } + } + + fn visit_ifnull( + &mut self, + _span: &mut &[Token<'_>], + expr1: &mut Expr<'_>, + expr2: &mut Expr<'_>, + ) { + walk_expr_mut(self, expr1); + walk_expr_mut(self, expr2); + } + + fn visit_statement(&mut self, _statement: &mut Statement<'_>) {} + + fn visit_query(&mut self, _query: &mut Query<'_>) {} + + fn visit_explain(&mut self, _kind: &mut ExplainKind, _query: &mut Statement<'_>) {} + + fn visit_copy(&mut self, _copy: &mut CopyStmt<'_>) {} + + fn visit_call(&mut self, _call: &mut CallStmt) {} + + fn visit_show_settings(&mut self, _like: &mut Option) {} + + fn visit_show_process_list(&mut self) {} + + fn visit_show_metrics(&mut self) {} + + fn visit_show_engines(&mut self) {} + + fn visit_show_functions(&mut self, _limit: &mut Option>) {} + + fn visit_kill(&mut self, _kill_target: &mut KillTarget, _object_id: &mut String) {} + + fn visit_set_variable( + &mut self, + _is_global: bool, + _variable: &mut Identifier<'_>, + _value: &mut Literal, + ) { + } + + fn visit_insert(&mut self, _insert: &mut InsertStmt<'_>) {} + + fn visit_delete( + &mut self, + _table_reference: &mut TableReference<'_>, + _selection: &mut Option>, + ) { + } + + fn visit_show_databases(&mut self, _stmt: &mut ShowDatabasesStmt<'_>) {} + + fn visit_show_create_databases(&mut self, _stmt: &mut ShowCreateDatabaseStmt<'_>) {} + + fn visit_create_database(&mut self, _stmt: &mut CreateDatabaseStmt<'_>) {} + + fn visit_drop_database(&mut self, _stmt: &mut DropDatabaseStmt<'_>) {} + + fn visit_undrop_database(&mut self, _stmt: &mut UndropDatabaseStmt<'_>) {} + + fn visit_alter_database(&mut self, _stmt: &mut AlterDatabaseStmt<'_>) {} + + fn visit_use_database(&mut self, _database: &mut Identifier<'_>) {} + + fn visit_show_tables(&mut self, _stmt: &mut ShowTablesStmt<'_>) {} + + fn visit_show_create_table(&mut self, _stmt: &mut ShowCreateTableStmt<'_>) {} + + fn visit_describe_table(&mut self, _stmt: &mut DescribeTableStmt<'_>) {} + + fn visit_show_tables_status(&mut self, _stmt: &mut ShowTablesStatusStmt<'_>) {} + + fn visit_create_table(&mut self, _stmt: &mut CreateTableStmt<'_>) {} + + fn visit_drop_table(&mut self, _stmt: &mut DropTableStmt<'_>) {} + + fn visit_undrop_table(&mut self, _stmt: &mut UndropTableStmt<'_>) {} + + fn visit_alter_table(&mut self, _stmt: &mut AlterTableStmt<'_>) {} + + fn visit_rename_table(&mut self, _stmt: &mut RenameTableStmt<'_>) {} + + fn visit_truncate_table(&mut self, _stmt: &mut TruncateTableStmt<'_>) {} + + fn visit_optimize_table(&mut self, _stmt: &mut OptimizeTableStmt<'_>) {} + + fn visit_exists_table(&mut self, _stmt: &mut ExistsTableStmt<'_>) {} + + fn visit_create_view(&mut self, _stmt: &mut CreateViewStmt<'_>) {} + + fn visit_alter_view(&mut self, _stmt: &mut AlterViewStmt<'_>) {} + + fn visit_show_users(&mut self) {} + + fn visit_create_user(&mut self, _stmt: &mut CreateUserStmt) {} + + fn visit_alter_user(&mut self, _stmt: &mut AlterUserStmt) {} + + fn visit_drop_user(&mut self, _if_exists: bool, _user: &mut UserIdentity) {} + + fn visit_show_roles(&mut self) {} + + fn visit_create_role(&mut self, _if_not_exists: bool, _role_name: &mut String) {} + + fn visit_drop_role(&mut self, _if_exists: bool, _role_name: &mut String) {} + + fn visit_grant(&mut self, _grant: &mut GrantStmt) {} + + fn visit_show_grant(&mut self, _principal: &mut Option) {} + + fn visit_revoke(&mut self, _revoke: &mut RevokeStmt) {} + + fn visit_create_udf( + &mut self, + _if_not_exists: bool, + _udf_name: &mut Identifier<'_>, + _parameters: &mut [Identifier<'_>], + _definition: &mut Expr<'_>, + _description: &mut Option, + ) { + } + + fn visit_drop_udf(&mut self, _if_exists: bool, _udf_name: &mut Identifier<'_>) {} + + fn visit_alter_udf( + &mut self, + _udf_name: &mut Identifier<'_>, + _parameters: &mut [Identifier<'_>], + _definition: &mut Expr<'_>, + _description: &mut Option, + ) { + } + + fn visit_create_stage(&mut self, _stmt: &mut CreateStageStmt) {} + + fn visit_show_stages(&mut self) {} + + fn visit_drop_stage(&mut self, _if_exists: bool, _stage_name: &mut String) {} + + fn visit_describe_stage(&mut self, _stage_name: &mut String) {} + + fn visit_remove_stage(&mut self, _location: &mut String, _pattern: &mut String) {} + + fn visit_list_stage(&mut self, _location: &mut String, _pattern: &mut String) {} + + fn visit_presign(&mut self, _presign: &mut PresignStmt) {} + + fn visit_create_share(&mut self, _stmt: &mut CreateShareStmt<'_>) {} + + fn visit_drop_share(&mut self, _stmt: &mut DropShareStmt<'_>) {} + + fn visit_grant_share_object(&mut self, _stmt: &mut GrantShareObjectStmt<'_>) {} + + fn visit_revoke_share_object(&mut self, _stmt: &mut RevokeShareObjectStmt<'_>) {} + + fn visit_with(&mut self, with: &mut With<'_>) { + let With { ctes, .. } = with; + for cte in ctes.iter_mut() { + walk_cte_mut(self, cte); + } + } + + fn visit_set_expr(&mut self, expr: &mut SetExpr<'_>) { + walk_set_expr_mut(self, expr); + } + + fn visit_set_operation(&mut self, op: &mut SetOperation<'_>) { + let SetOperation { left, right, .. } = op; + + walk_set_expr_mut(self, left); + walk_set_expr_mut(self, right); + } + + fn visit_order_by(&mut self, order_by: &mut OrderByExpr<'_>) { + let OrderByExpr { expr, .. } = order_by; + walk_expr_mut(self, expr); + } + + fn visit_select_stmt(&mut self, stmt: &mut SelectStmt<'_>) { + let SelectStmt { + select_list, + from, + selection, + group_by, + having, + .. + } = stmt; + + for target in select_list.iter_mut() { + walk_select_target_mut(self, target); + } + + for table_ref in from.iter_mut() { + walk_table_reference_mut(self, table_ref); + } + + if let Some(selection) = selection { + walk_expr_mut(self, selection); + } + + for expr in group_by.iter_mut() { + walk_expr_mut(self, expr); + } + + if let Some(having) = having { + walk_expr_mut(self, having); + } + } + + fn visit_select_target(&mut self, target: &mut SelectTarget<'_>) { + walk_select_target_mut(self, target); + } + + fn visit_table_reference(&mut self, table: &mut TableReference<'_>) { + walk_table_reference_mut(self, table); + } + + fn visit_time_travel_point(&mut self, time: &mut TimeTravelPoint<'_>) { + walk_time_travel_point_mut(self, time); + } + + fn visit_join(&mut self, join: &mut Join<'_>) { + let Join { + left, + right, + condition, + .. + } = join; + + walk_table_reference_mut(self, left); + walk_table_reference_mut(self, right); + + walk_join_condition_mut(self, condition); + } +} diff --git a/common/ast/src/visitors/walk.rs b/common/ast/src/visitors/walk.rs new file mode 100644 index 0000000000000..44c0a9263e63d --- /dev/null +++ b/common/ast/src/visitors/walk.rs @@ -0,0 +1,288 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::Visitor; +use crate::ast::*; + +pub fn walk_expr<'a, V: Visitor<'a>>(visitor: &mut V, expr: &'a Expr<'a>) { + match expr { + Expr::ColumnRef { + span, + database, + table, + column, + } => visitor.visit_column_ref(span, database, table, column), + Expr::IsNull { span, expr, not } => visitor.visit_is_null(span, expr, *not), + Expr::IsDistinctFrom { + span, + left, + right, + not, + } => visitor.visit_is_distinct_from(span, left, right, *not), + Expr::InList { + span, + expr, + list, + not, + } => visitor.visit_in_list(span, expr, list, *not), + Expr::InSubquery { + span, + expr, + subquery, + not, + } => visitor.visit_in_subquery(span, expr, subquery, *not), + Expr::Between { + span, + expr, + low, + high, + not, + } => visitor.visit_between(span, expr, low, high, *not), + Expr::BinaryOp { + span, + op, + left, + right, + } => visitor.visit_binary_op(span, op, left, right), + Expr::UnaryOp { span, op, expr } => visitor.visit_unary_op(span, op, expr), + Expr::Cast { + span, + expr, + target_type, + pg_style, + } => visitor.visit_cast(span, expr, target_type, *pg_style), + Expr::TryCast { + span, + expr, + target_type, + } => visitor.visit_try_cast(span, expr, target_type), + Expr::Extract { span, kind, expr } => visitor.visit_extract(span, kind, expr), + Expr::Position { + span, + substr_expr, + str_expr, + } => visitor.visit_positon(span, substr_expr, str_expr), + Expr::Substring { + span, + expr, + substring_from, + substring_for, + } => visitor.visit_substring(span, expr, substring_from, substring_for), + Expr::Trim { + span, + expr, + trim_where, + } => visitor.visit_trim(span, expr, trim_where), + Expr::Literal { span, lit } => visitor.visit_literal(span, lit), + Expr::CountAll { span } => visitor.visit_count_all(span), + Expr::Tuple { span, exprs } => visitor.visit_tuple(span, exprs), + Expr::FunctionCall { + span, + distinct, + name, + args, + params, + } => visitor.visit_function_call(span, *distinct, name, args, params), + Expr::Case { + span, + operand, + conditions, + results, + else_result, + } => visitor.visit_case_when(span, operand, conditions, results, else_result), + Expr::Exists { + span, + not, + subquery, + } => visitor.visit_exists(span, *not, subquery), + Expr::Subquery { + span, + modifier, + subquery, + } => visitor.visit_subquery(span, modifier, subquery), + Expr::MapAccess { + span, + expr, + accessor, + } => visitor.visit_map_access(span, expr, accessor), + Expr::Array { span, exprs } => visitor.visit_array(span, exprs), + Expr::Interval { span, expr, unit } => visitor.visit_interval(span, expr, unit), + Expr::DateAdd { + span, + date, + interval, + unit, + } => visitor.visit_date_add(span, date, interval, unit), + Expr::DateSub { + span, + date, + interval, + unit, + } => visitor.visit_date_sub(span, date, interval, unit), + Expr::DateTrunc { span, unit, date } => visitor.visit_date_trunc(span, unit, date), + Expr::NullIf { span, expr1, expr2 } => visitor.visit_nullif(span, expr1, expr2), + Expr::Coalesce { span, exprs } => visitor.visit_coalesce(span, exprs), + Expr::IfNull { span, expr1, expr2 } => visitor.visit_ifnull(span, expr1, expr2), + } +} + +pub fn walk_identifier<'a, V: Visitor<'a>>(visitor: &mut V, ident: &'a Identifier<'a>) { + visitor.visit_identifier(ident); +} + +pub fn walk_query<'a, V: Visitor<'a>>(visitor: &mut V, query: &'a Query<'a>) { + let Query { + with, + body, + order_by, + limit, + offset, + .. + } = query; + + if let Some(with) = with { + visitor.visit_with(with); + } + visitor.visit_set_expr(body); + for order_by in order_by { + visitor.visit_order_by(order_by); + } + for limit in limit { + visitor.visit_expr(limit); + } + if let Some(offset) = offset { + visitor.visit_expr(offset); + } +} + +pub fn walk_set_expr<'a, V: Visitor<'a>>(visitor: &mut V, set_expr: &'a SetExpr<'a>) { + match set_expr { + SetExpr::Select(select) => { + visitor.visit_select_stmt(select); + } + SetExpr::Query(query) => { + visitor.visit_query(query); + } + SetExpr::SetOperation(op) => { + visitor.visit_set_operation(op); + } + } +} + +pub fn walk_select_target<'a, V: Visitor<'a>>(visitor: &mut V, target: &'a SelectTarget<'a>) { + match target { + SelectTarget::AliasedExpr { expr, alias } => { + visitor.visit_expr(expr); + if let Some(alias) = alias { + visitor.visit_identifier(alias); + } + } + SelectTarget::QualifiedName(names) => { + for indirection in names { + match indirection { + Indirection::Identifier(ident) => { + visitor.visit_identifier(ident); + } + Indirection::Star => {} + } + } + } + } +} + +pub fn walk_table_reference<'a, V: Visitor<'a>>( + visitor: &mut V, + table_ref: &'a TableReference<'a>, +) { + match table_ref { + TableReference::Table { + catalog, + database, + table, + alias, + travel_point, + .. + } => { + if let Some(catalog) = catalog { + visitor.visit_identifier(catalog); + } + + if let Some(database) = database { + visitor.visit_identifier(database); + } + + visitor.visit_identifier(table); + + if let Some(alias) = alias { + visitor.visit_identifier(&alias.name); + } + + if let Some(travel_point) = travel_point { + visitor.visit_time_travel_point(travel_point); + } + } + TableReference::Subquery { + subquery, alias, .. + } => { + visitor.visit_query(subquery); + if let Some(alias) = alias { + visitor.visit_identifier(&alias.name); + } + } + TableReference::TableFunction { + name, + params, + alias, + .. + } => { + visitor.visit_identifier(name); + for param in params { + visitor.visit_expr(param); + } + if let Some(alias) = alias { + visitor.visit_identifier(&alias.name); + } + } + TableReference::Join { join, .. } => { + visitor.visit_join(join); + } + } +} + +pub fn walk_time_travel_point<'a, V: Visitor<'a>>(visitor: &mut V, time: &'a TimeTravelPoint<'a>) { + match time { + TimeTravelPoint::Snapshot(_) => {} + TimeTravelPoint::Timestamp(expr) => visitor.visit_expr(expr), + } +} + +pub fn walk_join_condition<'a, V: Visitor<'a>>(visitor: &mut V, join_cond: &'a JoinCondition<'a>) { + match join_cond { + JoinCondition::On(expr) => visitor.visit_expr(expr), + JoinCondition::Using(using) => { + for ident in using.iter() { + visitor.visit_identifier(ident); + } + } + JoinCondition::Natural => {} + JoinCondition::None => {} + } +} + +pub fn walk_cte<'a, V: Visitor<'a>>(visitor: &mut V, cte: &'a CTE<'a>) { + let CTE { alias, query, .. } = cte; + + visitor.visit_identifier(&alias.name); + visitor.visit_query(query); +} diff --git a/common/ast/src/visitors/walk_mut.rs b/common/ast/src/visitors/walk_mut.rs new file mode 100644 index 0000000000000..da83107bb8d33 --- /dev/null +++ b/common/ast/src/visitors/walk_mut.rs @@ -0,0 +1,294 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::visitor_mut::VisitorMut; +use crate::ast::*; + +pub fn walk_expr_mut<'a, V: VisitorMut>(visitor: &mut V, expr: &mut Expr<'a>) { + match expr { + Expr::ColumnRef { + span, + database, + table, + column, + } => visitor.visit_column_ref(span, database, table, column), + Expr::IsNull { span, expr, not } => visitor.visit_is_null(span, expr, *not), + Expr::IsDistinctFrom { + span, + left, + right, + not, + } => visitor.visit_is_distinct_from(span, left, right, *not), + Expr::InList { + span, + expr, + list, + not, + } => visitor.visit_in_list(span, expr, list, *not), + Expr::InSubquery { + span, + expr, + subquery, + not, + } => visitor.visit_in_subquery(span, expr, subquery, *not), + Expr::Between { + span, + expr, + low, + high, + not, + } => visitor.visit_between(span, expr, low, high, *not), + Expr::BinaryOp { + span, + op, + left, + right, + } => visitor.visit_binary_op(span, op, left, right), + Expr::UnaryOp { span, op, expr } => visitor.visit_unary_op(span, op, expr), + Expr::Cast { + span, + expr, + target_type, + pg_style, + } => visitor.visit_cast(span, expr, target_type, *pg_style), + Expr::TryCast { + span, + expr, + target_type, + } => visitor.visit_try_cast(span, expr, target_type), + Expr::Extract { span, kind, expr } => visitor.visit_extract(span, kind, expr), + Expr::Position { + span, + substr_expr, + str_expr, + } => visitor.visit_positon(span, substr_expr, str_expr), + Expr::Substring { + span, + expr, + substring_from, + substring_for, + } => visitor.visit_substring(span, expr, substring_from, substring_for), + Expr::Trim { + span, + expr, + trim_where, + } => visitor.visit_trim(span, expr, trim_where), + Expr::Literal { span, lit } => visitor.visit_literal(span, lit), + Expr::CountAll { span } => visitor.visit_count_all(span), + Expr::Tuple { span, exprs } => visitor.visit_tuple(span, exprs), + Expr::FunctionCall { + span, + distinct, + name, + args, + params, + } => visitor.visit_function_call(span, *distinct, name, args, params), + Expr::Case { + span, + operand, + conditions, + results, + else_result, + } => visitor.visit_case_when(span, operand, conditions, results, else_result), + Expr::Exists { + span, + not, + subquery, + } => visitor.visit_exists(span, *not, subquery), + Expr::Subquery { + span, + modifier, + subquery, + } => visitor.visit_subquery(span, modifier, subquery), + Expr::MapAccess { + span, + expr, + accessor, + } => visitor.visit_map_access(span, expr, accessor), + Expr::Array { span, exprs } => visitor.visit_array(span, exprs), + Expr::Interval { span, expr, unit } => visitor.visit_interval(span, expr, unit), + Expr::DateAdd { + span, + date, + interval, + unit, + } => visitor.visit_date_add(span, date, interval, unit), + Expr::DateSub { + span, + date, + interval, + unit, + } => visitor.visit_date_sub(span, date, interval, unit), + Expr::DateTrunc { span, unit, date } => visitor.visit_date_trunc(span, unit, date), + Expr::IfNull { span, expr1, expr2 } => visitor.visit_ifnull(span, expr1, expr2), + Expr::NullIf { span, expr1, expr2 } => visitor.visit_nullif(span, expr1, expr2), + Expr::Coalesce { span, exprs } => visitor.visit_coalesce(span, exprs), + } +} + +pub fn walk_identifier_mut<'a, V: VisitorMut>(visitor: &mut V, ident: &mut Identifier<'a>) { + visitor.visit_identifier(ident); +} + +pub fn walk_query_mut<'a, V: VisitorMut>(visitor: &mut V, query: &mut Query<'a>) { + let Query { + with, + body, + order_by, + limit, + offset, + .. + } = query; + + if let Some(with) = with { + visitor.visit_with(with); + } + visitor.visit_set_expr(body); + for order_by in order_by { + visitor.visit_order_by(order_by); + } + for limit in limit { + visitor.visit_expr(limit); + } + if let Some(offset) = offset { + visitor.visit_expr(offset); + } +} + +pub fn walk_set_expr_mut<'a, V: VisitorMut>(visitor: &mut V, set_expr: &mut SetExpr<'a>) { + match set_expr { + SetExpr::Select(select) => { + visitor.visit_select_stmt(select); + } + SetExpr::Query(query) => { + visitor.visit_query(query); + } + SetExpr::SetOperation(op) => { + visitor.visit_set_operation(op); + } + } +} + +pub fn walk_select_target_mut<'a, V: VisitorMut>(visitor: &mut V, target: &mut SelectTarget<'a>) { + match target { + SelectTarget::AliasedExpr { expr, alias } => { + visitor.visit_expr(expr); + if let Some(alias) = alias { + visitor.visit_identifier(alias); + } + } + SelectTarget::QualifiedName(names) => { + for indirection in names { + match indirection { + Indirection::Identifier(ident) => { + visitor.visit_identifier(ident); + } + Indirection::Star => {} + } + } + } + } +} + +pub fn walk_table_reference_mut<'a, V: VisitorMut>( + visitor: &mut V, + table_ref: &mut TableReference<'a>, +) { + match table_ref { + TableReference::Table { + catalog, + database, + table, + alias, + travel_point, + .. + } => { + if let Some(catalog) = catalog { + visitor.visit_identifier(catalog); + } + + if let Some(database) = database { + visitor.visit_identifier(database); + } + + visitor.visit_identifier(table); + + if let Some(alias) = alias { + visitor.visit_identifier(&mut alias.name); + } + + if let Some(travel_point) = travel_point { + visitor.visit_time_travel_point(travel_point); + } + } + TableReference::Subquery { + subquery, alias, .. + } => { + visitor.visit_query(subquery); + if let Some(alias) = alias { + visitor.visit_identifier(&mut alias.name); + } + } + TableReference::TableFunction { + name, + params, + alias, + .. + } => { + visitor.visit_identifier(name); + for param in params { + visitor.visit_expr(param); + } + if let Some(alias) = alias { + visitor.visit_identifier(&mut alias.name); + } + } + TableReference::Join { join, .. } => { + visitor.visit_join(join); + } + } +} + +pub fn walk_time_travel_point_mut<'a, V: VisitorMut>( + visitor: &mut V, + time: &mut TimeTravelPoint<'a>, +) { + match time { + TimeTravelPoint::Snapshot(_) => {} + TimeTravelPoint::Timestamp(expr) => visitor.visit_expr(expr), + } +} + +pub fn walk_join_condition_mut<'a, V: VisitorMut>( + visitor: &mut V, + join_cond: &mut JoinCondition<'a>, +) { + match join_cond { + JoinCondition::On(expr) => visitor.visit_expr(expr), + JoinCondition::Using(using) => { + for ident in using.iter_mut() { + visitor.visit_identifier(ident); + } + } + JoinCondition::Natural => {} + JoinCondition::None => {} + } +} + +pub fn walk_cte_mut<'a, V: VisitorMut>(visitor: &mut V, cte: &mut CTE<'a>) { + let CTE { alias, query, .. } = cte; + + visitor.visit_identifier(&mut alias.name); + visitor.visit_query(query); +} diff --git a/query/src/sql/planner/binder/ddl/table.rs b/query/src/sql/planner/binder/ddl/table.rs index 7ad45e9d791ac..5f6b46286622f 100644 --- a/query/src/sql/planner/binder/ddl/table.rs +++ b/query/src/sql/planner/binder/ddl/table.rs @@ -21,6 +21,7 @@ use common_ast::ast::OptimizeTableAction as AstOptimizeTableAction; use common_ast::ast::*; use common_ast::parser::parse_sql; use common_ast::parser::tokenize_sql; +use common_ast::walk_expr_mut; use common_ast::Backtrace; use common_datavalues::DataField; use common_datavalues::DataSchemaRef; @@ -44,6 +45,7 @@ use crate::sql::optimizer::optimize; use crate::sql::optimizer::OptimizerConfig; use crate::sql::optimizer::OptimizerContext; use crate::sql::planner::semantic::normalize_identifier; +use crate::sql::planner::semantic::IdentifierNormalizer; use crate::sql::plans::create_table_v2::CreateTablePlanV2; use crate::sql::plans::Plan; use crate::sql::plans::RewriteKind; @@ -843,7 +845,14 @@ impl<'a> Binder { cluster_by ))); } - cluster_keys.push(format!("{:#}", cluster_by)); + let mut cluster_by = cluster_by.clone(); + walk_expr_mut( + &mut IdentifierNormalizer { + ctx: &self.name_resolution_ctx, + }, + &mut cluster_by, + ); + cluster_keys.push(format!("{:#}", &cluster_by)); } Ok(cluster_keys) diff --git a/query/src/sql/planner/mod.rs b/query/src/sql/planner/mod.rs index 3cafd7bb21b7f..4e874d0f8099b 100644 --- a/query/src/sql/planner/mod.rs +++ b/query/src/sql/planner/mod.rs @@ -41,6 +41,7 @@ pub use metadata::Metadata; pub use metadata::MetadataRef; pub use metadata::TableEntry; pub use semantic::normalize_identifier; +pub use semantic::IdentifierNormalizer; pub use semantic::NameResolutionContext; use self::plans::Plan; diff --git a/query/src/sql/planner/semantic/mod.rs b/query/src/sql/planner/semantic/mod.rs index 40f79e21850f0..2f25bce4369d8 100644 --- a/query/src/sql/planner/semantic/mod.rs +++ b/query/src/sql/planner/semantic/mod.rs @@ -18,5 +18,6 @@ mod type_check; pub use grouping_check::GroupingChecker; pub use name_resolution::normalize_identifier; +pub use name_resolution::IdentifierNormalizer; pub use name_resolution::NameResolutionContext; pub use type_check::TypeChecker; diff --git a/query/src/sql/planner/semantic/name_resolution.rs b/query/src/sql/planner/semantic/name_resolution.rs index d6805e58d1312..5985acca20f10 100644 --- a/query/src/sql/planner/semantic/name_resolution.rs +++ b/query/src/sql/planner/semantic/name_resolution.rs @@ -13,6 +13,7 @@ // limitations under the License. use common_ast::ast::Identifier; +use common_ast::VisitorMut; use common_settings::Settings; #[derive(Debug, Clone)] @@ -61,3 +62,14 @@ pub fn normalize_identifier<'a>( } } } + +pub struct IdentifierNormalizer<'a> { + pub ctx: &'a NameResolutionContext, +} + +impl<'a> VisitorMut for IdentifierNormalizer<'a> { + fn visit_identifier(&mut self, ident: &mut Identifier<'_>) { + let normalized_ident = normalize_identifier(ident, self.ctx); + *ident = normalized_ident; + } +} diff --git a/query/tests/it/sql/planner/semantic/name_resolution.rs b/query/tests/it/sql/planner/semantic/name_resolution.rs index 779ec3648e11c..d366e87407e53 100644 --- a/query/tests/it/sql/planner/semantic/name_resolution.rs +++ b/query/tests/it/sql/planner/semantic/name_resolution.rs @@ -13,9 +13,14 @@ // limitations under the License. use common_ast::ast::Identifier; +use common_ast::parser::parse_expr; use common_ast::parser::token::Token; use common_ast::parser::token::TokenKind; +use common_ast::parser::tokenize_sql; +use common_ast::walk_expr_mut; +use common_ast::Backtrace; use databend_query::sql::normalize_identifier; +use databend_query::sql::IdentifierNormalizer; use databend_query::sql::NameResolutionContext; #[test] @@ -114,3 +119,20 @@ fn test_normalize_identifier_unquoted_case_sensitive() { ); } } + +#[test] +fn test_normalize_identifiers_in_expr() { + let tokens = tokenize_sql("exists(select func(\"T\".A+1) as B)").unwrap(); + let backtrace = Backtrace::new(); + let mut expr = parse_expr(&tokens, &backtrace).unwrap(); + + let ctx = NameResolutionContext::default(); + let mut normalizer = IdentifierNormalizer { ctx: &ctx }; + + walk_expr_mut(&mut normalizer, &mut expr); + + assert_eq!( + format!("{:#}", expr), + "EXISTS (SELECT func(\"T\".a + 1) AS b)".to_string() + ); +} diff --git a/tests/logictest/suites/base/15_query/case_sensitivity/ddl.test b/tests/logictest/suites/base/15_query/case_sensitivity/ddl.test index 18b92540498da..288a5fb59873a 100644 --- a/tests/logictest/suites/base/15_query/case_sensitivity/ddl.test +++ b/tests/logictest/suites/base/15_query/case_sensitivity/ddl.test @@ -12,7 +12,7 @@ create table t("A" int); onlyif mysql statement ok -create table "T"(a int); +create table "T"(a int) cluster by (A+1); onlyif mysql statement ok From 3014feae1748d88d8546dbc4ff3b42716444f32c Mon Sep 17 00:00:00 2001 From: leiysky Date: Wed, 10 Aug 2022 12:22:06 +0800 Subject: [PATCH 34/59] fix ut --- common/ast/tests/it/testdata/query.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/common/ast/tests/it/testdata/query.txt b/common/ast/tests/it/testdata/query.txt index dd158ff3eaad9..0bc0d6c1c90da 100644 --- a/common/ast/tests/it/testdata/query.txt +++ b/common/ast/tests/it/testdata/query.txt @@ -853,7 +853,7 @@ Query { ], recursive: false, ctes: [ - Cte { + CTE { span: [ Ident(5..7), LParen(7..8), @@ -1094,7 +1094,7 @@ Query { ], recursive: false, ctes: [ - Cte { + CTE { span: [ Ident(5..7), AS(8..10), @@ -1388,7 +1388,7 @@ Query { ], recursive: false, ctes: [ - Cte { + CTE { span: [ Ident(5..7), LParen(7..8), @@ -1477,7 +1477,7 @@ Query { format: None, }, }, - Cte { + CTE { span: [ Ident(34..36), AS(37..39), @@ -1547,7 +1547,7 @@ Query { format: None, }, }, - Cte { + CTE { span: [ Ident(59..61), AS(62..64), @@ -1916,7 +1916,7 @@ Query { ], recursive: true, ctes: [ - Cte { + CTE { span: [ Ident(15..17), LParen(17..18), From 33795f2f1750e19b38add6558ead188ccbfe1be7 Mon Sep 17 00:00:00 2001 From: leiysky Date: Wed, 10 Aug 2022 12:37:01 +0800 Subject: [PATCH 35/59] implement default method for Visitor --- common/ast/src/visitors/visitor.rs | 128 +++++++++++++++++++++-------- 1 file changed, 93 insertions(+), 35 deletions(-) diff --git a/common/ast/src/visitors/visitor.rs b/common/ast/src/visitors/visitor.rs index 92e0f124eab03..eb86f5260da69 100644 --- a/common/ast/src/visitors/visitor.rs +++ b/common/ast/src/visitors/visitor.rs @@ -95,120 +95,160 @@ pub trait Visitor<'ast>: Sized { fn visit_between( &mut self, _span: &'ast [Token<'ast>], - _expr: &'ast Expr<'ast>, - _low: &'ast Expr<'ast>, - _high: &'ast Expr<'ast>, + expr: &'ast Expr<'ast>, + low: &'ast Expr<'ast>, + high: &'ast Expr<'ast>, _not: bool, ) { + walk_expr(self, expr); + walk_expr(self, low); + walk_expr(self, high); } fn visit_binary_op( &mut self, _span: &'ast [Token<'ast>], _op: &'ast BinaryOperator, - _left: &'ast Expr<'ast>, - _right: &'ast Expr<'ast>, + left: &'ast Expr<'ast>, + right: &'ast Expr<'ast>, ) { + walk_expr(self, left); + walk_expr(self, right); } fn visit_unary_op( &mut self, _span: &'ast [Token<'ast>], _op: &'ast UnaryOperator, - _expr: &'ast Expr<'ast>, + expr: &'ast Expr<'ast>, ) { + walk_expr(self, expr); } fn visit_cast( &mut self, _span: &'ast [Token<'ast>], - _expr: &'ast Expr<'ast>, + expr: &'ast Expr<'ast>, _target_type: &'ast TypeName, _pg_style: bool, ) { + walk_expr(self, expr); } fn visit_try_cast( &mut self, _span: &'ast [Token<'ast>], - _expr: &'ast Expr<'ast>, + expr: &'ast Expr<'ast>, _target_type: &'ast TypeName, ) { + walk_expr(self, expr); } fn visit_extract( &mut self, _span: &'ast [Token<'ast>], _kind: &'ast IntervalKind, - _expr: &'ast Expr<'ast>, + expr: &'ast Expr<'ast>, ) { + walk_expr(self, expr); } fn visit_positon( &mut self, _span: &'ast [Token<'ast>], - _substr_expr: &'ast Expr<'ast>, - _str_expr: &'ast Expr<'ast>, + substr_expr: &'ast Expr<'ast>, + str_expr: &'ast Expr<'ast>, ) { + walk_expr(self, substr_expr); + walk_expr(self, str_expr); } fn visit_substring( &mut self, _span: &'ast [Token<'ast>], - _expr: &'ast Expr<'ast>, - _substring_from: &'ast Option>>, - _substring_for: &'ast Option>>, + expr: &'ast Expr<'ast>, + substring_from: &'ast Option>>, + substring_for: &'ast Option>>, ) { + walk_expr(self, expr); + if let Some(substring_from) = substring_from { + walk_expr(self, substring_from); + } + if let Some(substring_for) = substring_for { + walk_expr(self, substring_for); + } } fn visit_trim( &mut self, _span: &'ast [Token<'ast>], - _expr: &'ast Expr<'ast>, + expr: &'ast Expr<'ast>, _trim_where: &'ast Option<(TrimWhere, Box>)>, ) { + walk_expr(self, expr); } fn visit_literal(&mut self, _span: &'ast [Token<'ast>], _lit: &'ast Literal) {} fn visit_count_all(&mut self, _span: &'ast [Token<'ast>]) {} - fn visit_tuple(&mut self, _span: &'ast [Token<'ast>], _elements: &'ast [Expr<'ast>]) {} + fn visit_tuple(&mut self, _span: &'ast [Token<'ast>], elements: &'ast [Expr<'ast>]) { + for element in elements { + walk_expr(self, element); + } + } fn visit_function_call( &mut self, _span: &'ast [Token<'ast>], _distinct: bool, _name: &'ast Identifier<'ast>, - _args: &'ast [Expr<'ast>], + args: &'ast [Expr<'ast>], _params: &'ast [Literal], ) { + for arg in args { + walk_expr(self, arg); + } } fn visit_case_when( &mut self, _span: &'ast [Token<'ast>], - _operand: &'ast Option>>, - _conditions: &'ast [Expr<'ast>], - _results: &'ast [Expr<'ast>], - _else_result: &'ast Option>>, + operand: &'ast Option>>, + conditions: &'ast [Expr<'ast>], + results: &'ast [Expr<'ast>], + else_result: &'ast Option>>, ) { + if let Some(operand) = operand { + walk_expr(self, operand); + } + for condition in conditions { + walk_expr(self, condition); + } + for result in results { + walk_expr(self, result); + } + if let Some(else_result) = else_result { + walk_expr(self, else_result); + } } fn visit_exists( &mut self, _span: &'ast [Token<'ast>], _not: bool, - _subquery: &'ast Query<'ast>, + subquery: &'ast Query<'ast>, ) { + walk_query(self, subquery); } fn visit_subquery( &mut self, _span: &'ast [Token<'ast>], _modifier: &'ast Option, - _subquery: &'ast Query<'ast>, + subquery: &'ast Query<'ast>, ) { + walk_query(self, subquery); } fn visit_map_access( @@ -220,58 +260,76 @@ pub trait Visitor<'ast>: Sized { walk_expr(self, expr); } - fn visit_array(&mut self, _span: &'ast [Token<'ast>], _exprs: &'ast [Expr<'ast>]) {} + fn visit_array(&mut self, _span: &'ast [Token<'ast>], exprs: &'ast [Expr<'ast>]) { + for expr in exprs { + walk_expr(self, expr); + } + } fn visit_interval( &mut self, _span: &'ast [Token<'ast>], - _expr: &'ast Expr<'ast>, + expr: &'ast Expr<'ast>, _unit: &'ast IntervalKind, ) { + walk_expr(self, expr); } fn visit_date_add( &mut self, _span: &'ast [Token<'ast>], - _date: &'ast Expr<'ast>, - _interval: &'ast Expr<'ast>, + date: &'ast Expr<'ast>, + interval: &'ast Expr<'ast>, _unit: &'ast IntervalKind, ) { + walk_expr(self, date); + walk_expr(self, interval); } fn visit_date_sub( &mut self, _span: &'ast [Token<'ast>], - _date: &'ast Expr<'ast>, - _interval: &'ast Expr<'ast>, + date: &'ast Expr<'ast>, + interval: &'ast Expr<'ast>, _unit: &'ast IntervalKind, ) { + walk_expr(self, date); + walk_expr(self, interval); } fn visit_date_trunc( &mut self, _span: &'ast [Token<'ast>], _unit: &'ast IntervalKind, - _date: &'ast Expr<'ast>, + date: &'ast Expr<'ast>, ) { + walk_expr(self, date); } fn visit_nullif( &mut self, _span: &'ast [Token<'ast>], - _expr1: &'ast Expr<'ast>, - _expr2: &'ast Expr<'ast>, + expr1: &'ast Expr<'ast>, + expr2: &'ast Expr<'ast>, ) { + walk_expr(self, expr1); + walk_expr(self, expr2); } - fn visit_coalesce(&mut self, _span: &'ast [Token<'ast>], _exprs: &'ast [Expr<'ast>]) {} + fn visit_coalesce(&mut self, _span: &'ast [Token<'ast>], exprs: &'ast [Expr<'ast>]) { + for expr in exprs { + walk_expr(self, expr); + } + } fn visit_ifnull( &mut self, _span: &'ast [Token<'ast>], - _expr1: &'ast Expr<'ast>, - _expr2: &'ast Expr<'ast>, + expr1: &'ast Expr<'ast>, + expr2: &'ast Expr<'ast>, ) { + walk_expr(self, expr1); + walk_expr(self, expr2); } fn visit_statement(&mut self, _statement: &'ast Statement<'ast>) {} From bb707e4cb695ad466955c43daf486afc947f5806 Mon Sep 17 00:00:00 2001 From: elijah Date: Wed, 10 Aug 2022 12:43:39 +0800 Subject: [PATCH 36/59] chore: rename datetime functions for date_trunc --- query/src/sql/planner/semantic/type_check.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/query/src/sql/planner/semantic/type_check.rs b/query/src/sql/planner/semantic/type_check.rs index a6a944427fa3c..f57b66bd7e782 100644 --- a/query/src/sql/planner/semantic/type_check.rs +++ b/query/src/sql/planner/semantic/type_check.rs @@ -1258,7 +1258,7 @@ impl<'a> TypeChecker<'a> { IntervalKind::Year => { self.resolve_function( span, - "toStartOfYear", + "to_start_of_year", &[date], Some(TimestampType::new_impl(0)), ) @@ -1267,7 +1267,7 @@ impl<'a> TypeChecker<'a> { IntervalKind::Month => { self.resolve_function( span, - "toStartOfMonth", + "to_start_of_month", &[date], Some(TimestampType::new_impl(0)), ) @@ -1276,7 +1276,7 @@ impl<'a> TypeChecker<'a> { IntervalKind::Day => { self.resolve_function( span, - "toStartOfDay", + "to_start_of_day", &[date], Some(TimestampType::new_impl(0)), ) @@ -1285,7 +1285,7 @@ impl<'a> TypeChecker<'a> { IntervalKind::Hour => { self.resolve_function( span, - "toStartOfHour", + "to_start_of_hour", &[date], Some(TimestampType::new_impl(0)), ) @@ -1294,7 +1294,7 @@ impl<'a> TypeChecker<'a> { IntervalKind::Minute => { self.resolve_function( span, - "toStartOfMinute", + "to_start_of_minute", &[date], Some(TimestampType::new_impl(0)), ) @@ -1303,7 +1303,7 @@ impl<'a> TypeChecker<'a> { IntervalKind::Second => { self.resolve_function( span, - "toStartOfSecond", + "to_start_of_second", &[date], Some(TimestampType::new_impl(0)), ) From 7a0caa91bd031fbe9953a6a8005a06e3fb9e2539 Mon Sep 17 00:00:00 2001 From: andylokandy Date: Wed, 10 Aug 2022 13:43:35 +0800 Subject: [PATCH 37/59] fix clippy --- common/expression/src/chunk.rs | 3 +- common/expression/src/kernels/filter.rs | 6 +- common/expression/tests/it/expression.rs | 73 +++++++++++++++--------- 3 files changed, 50 insertions(+), 32 deletions(-) diff --git a/common/expression/src/chunk.rs b/common/expression/src/chunk.rs index 24cd1f35ba220..d18a4d6317c3c 100644 --- a/common/expression/src/chunk.rs +++ b/common/expression/src/chunk.rs @@ -15,6 +15,7 @@ use std::ops::Range; use crate::types::AnyType; +use crate::Domain; use crate::Value; /// Chunk is a lightweight container for a group of columns. @@ -72,7 +73,7 @@ impl Chunk { .map(|value| value.as_ref().domain()) .collect() } - + #[inline] pub fn memory_size(&self) -> usize { self.columns() diff --git a/common/expression/src/kernels/filter.rs b/common/expression/src/kernels/filter.rs index 5ba8fa39f5224..080b15adfda72 100644 --- a/common/expression/src/kernels/filter.rs +++ b/common/expression/src/kernels/filter.rs @@ -108,9 +108,9 @@ impl Column { return self.clone(); } - with_number_type!(SRC_TYPE, match self { - Column::SRC_TYPE(values) => { - Column::SRC_TYPE(Self::filter_primitive_types(values, filter)) + with_number_type!(|NUM_TYPE| match self { + Column::NUM_TYPE(values) => { + Column::NUM_TYPE(Self::filter_primitive_types(values, filter)) } Column::Null { .. } | Column::EmptyArray { .. } => self.slice(0..length), Column::Boolean(bm) => Self::filter_scalar_types::( diff --git a/common/expression/tests/it/expression.rs b/common/expression/tests/it/expression.rs index b6796def52462..697023a6ffdc6 100644 --- a/common/expression/tests/it/expression.rs +++ b/common/expression/tests/it/expression.rs @@ -31,17 +31,16 @@ use common_expression::BooleanDomain; use common_expression::Chunk; use common_expression::Column; use common_expression::ColumnBuilder; +use common_expression::ConstantFolder; use common_expression::Domain; -use common_expression::DomainCalculator; use common_expression::Evaluator; -use common_expression::FloatDomain; use common_expression::Function; use common_expression::FunctionContext; use common_expression::FunctionProperty; use common_expression::FunctionRegistry; use common_expression::FunctionSignature; -use common_expression::IntDomain; use common_expression::NullableDomain; +use common_expression::NumberDomain; use common_expression::RemoteExpr; use common_expression::Scalar; use common_expression::ScalarRef; @@ -51,12 +50,14 @@ use goldenfile::Mint; use crate::parser::parse_raw_expr; +// Deprecate: move tests to `common_function_v2` #[test] pub fn test_pass() { let mut mint = Mint::new("tests/it/testdata"); let mut file = mint.new_goldenfile("run-pass.txt").unwrap(); run_ast(&mut file, "true AND false", &[]); + run_ast(&mut file, "CAST(false AS BOOLEAN NULL)", &[]); run_ast(&mut file, "null AND false", &[]); run_ast(&mut file, "plus(a, 10)", &[( "a", @@ -622,9 +623,9 @@ fn builtin_functions() -> FunctionRegistry { "plus", FunctionProperty::default(), |lhs, rhs| { - Some(IntDomain { - min: lhs.min.checked_add(rhs.min).unwrap_or(i16::MAX as i64), - max: lhs.max.checked_add(rhs.max).unwrap_or(i16::MAX as i64), + Some(NumberDomain { + min: lhs.min.checked_add(rhs.min).unwrap_or(i16::MAX), + max: lhs.max.checked_add(rhs.max).unwrap_or(i16::MAX), }) }, |lhs, rhs| lhs + rhs, @@ -634,9 +635,9 @@ fn builtin_functions() -> FunctionRegistry { "minus", FunctionProperty::default(), |lhs, rhs| { - Some(IntDomain { - min: lhs.min.checked_sub(rhs.max).unwrap_or(i32::MAX as i64), - max: lhs.max.checked_sub(rhs.min).unwrap_or(i32::MAX as i64), + Some(NumberDomain { + min: lhs.min.checked_sub(rhs.max).unwrap_or(i32::MAX), + max: lhs.max.checked_sub(rhs.min).unwrap_or(i32::MAX), }) }, |lhs, rhs| lhs - rhs, @@ -660,7 +661,7 @@ fn builtin_functions() -> FunctionRegistry { "avg", FunctionProperty::default(), |lhs, rhs| { - Some(FloatDomain { + Some(NumberDomain { min: (lhs.min + rhs.min) / 2.0, max: (lhs.max + rhs.max) / 2.0, }) @@ -691,15 +692,15 @@ fn builtin_functions() -> FunctionRegistry { calc_domain: Box::new(|args_domain, _| { let min = args_domain .iter() - .map(|domain| domain.as_int().unwrap().min) + .map(|domain| domain.as_int16().unwrap().min) .min() .unwrap_or(0); let max = args_domain .iter() - .map(|domain| domain.as_int().unwrap().max) + .map(|domain| domain.as_int16().unwrap().max) .min() .unwrap_or(0); - Domain::Int(IntDomain { min, max }) + Some(Domain::Int16(NumberDomain { min, max })) }), eval: Box::new(|args, generics| { if args.is_empty() { @@ -731,7 +732,7 @@ fn builtin_functions() -> FunctionRegistry { registry.register_0_arg_core::( "create_array", FunctionProperty::default(), - || None, + || Some(()), |_| Ok(Value::Scalar(())), ); @@ -744,9 +745,9 @@ fn builtin_functions() -> FunctionRegistry { property: FunctionProperty::default(), }, calc_domain: Box::new(|args_domain, _| { - args_domain.iter().fold(Domain::Array(None), |acc, x| { + Some(args_domain.iter().fold(Domain::Array(None), |acc, x| { acc.merge(&Domain::Array(Some(Box::new(x.clone())))) - }) + })) }), eval: Box::new(|args, generics| { let len = args.iter().find_map(|arg| match arg { @@ -794,7 +795,7 @@ fn builtin_functions() -> FunctionRegistry { registry.register_passthrough_nullable_2_arg::>, NumberType, GenericType<0>,_, _>( "get", FunctionProperty::default(), - |item_domain, _| Some(item_domain.clone()), + |_, _| None, vectorize_with_writer_2_arg::>, NumberType, GenericType<0>>( |array, idx, output| { let item = array @@ -813,7 +814,7 @@ fn builtin_functions() -> FunctionRegistry { return_type: DataType::Tuple(args_type.to_vec()), property: FunctionProperty::default(), }, - calc_domain: Box::new(|args_domain, _| Domain::Tuple(args_domain.to_vec())), + calc_domain: Box::new(|args_domain, _| Some(Domain::Tuple(args_domain.to_vec()))), eval: Box::new(move |args, _generics| { let len = args.iter().find_map(|arg| match arg { ValueRef::Column(col) => Some(col.len()), @@ -861,7 +862,7 @@ fn builtin_functions() -> FunctionRegistry { property: FunctionProperty::default(), }, calc_domain: Box::new(move |args_domain, _| { - args_domain[0].as_tuple().unwrap()[idx].clone() + Some(args_domain[0].as_tuple().unwrap()[idx].clone()) }), eval: Box::new(move |args, _| match &args[0] { ValueRef::Scalar(ScalarRef::Tuple(fields)) => { @@ -900,10 +901,10 @@ fn builtin_functions() -> FunctionRegistry { let fields = value.as_tuple().unwrap(); Box::new(fields[idx].clone()) }); - Domain::Nullable(NullableDomain { + Some(Domain::Nullable(NullableDomain { has_null: *has_null, value, - }) + })) }), eval: Box::new(move |args, _| match &args[0] { ValueRef::Scalar(ScalarRef::Null) => Ok(Value::Scalar(Scalar::Null)), @@ -946,8 +947,8 @@ fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Column .map(|(_, _, col)| col.domain()) .collect::>(); - let domain_calculator = DomainCalculator::new(input_domains.clone()); - let output_domain = domain_calculator.calculate(&expr)?; + let constant_folder = ConstantFolder::new(&input_domains); + let (optimized_expr, output_domain) = constant_folder.fold(&expr); let num_rows = columns.iter().map(|col| col.2.len()).max().unwrap_or(0); let chunk = Chunk::new( @@ -963,26 +964,42 @@ fn run_ast(file: &mut impl Write, text: &str, columns: &[(&str, DataType, Column }); let evaluator = Evaluator { - input_columns: chunk, + input_columns: &chunk, context: FunctionContext::default(), }; - let result = evaluator.run(&expr)?; + let result = evaluator.run(&expr); + let optimized_result = evaluator.run(&optimized_expr); + match &result { + Ok(result) => assert!( + result + .as_ref() + .sematically_eq(&optimized_result.unwrap().as_ref()) + ), + Err(e) => assert_eq!(e, &optimized_result.unwrap_err()), + } ( raw_expr, expr, input_domains, output_ty, - output_domain, - result, + optimized_expr, + output_domain + .as_ref() + .map(ToString::to_string) + .unwrap_or_else(|| "Unknown".to_string()), + result?, ) }; match result { - Ok((raw_expr, expr, input_domains, output_ty, output_domain, result)) => { + Ok((raw_expr, expr, input_domains, output_ty, optimized_expr, output_domain, result)) => { writeln!(file, "ast : {text}").unwrap(); writeln!(file, "raw expr : {raw_expr}").unwrap(); writeln!(file, "checked expr : {expr}").unwrap(); + if optimized_expr != expr { + writeln!(file, "optimized expr : {optimized_expr}").unwrap(); + } match result { Value::Scalar(output_scalar) => { From 26e120ff2b9c21ae1e56f0f8685969767cf7334b Mon Sep 17 00:00:00 2001 From: elijah Date: Wed, 10 Aug 2022 15:34:39 +0800 Subject: [PATCH 38/59] chore: rename datetime functions for ut --- .../src/scalars/dates/number_function.rs | 2 +- .../functions/tests/it/scalars/dates/date.rs | 16 +- .../tests/it/scalars/dates/date_function.rs | 172 +++++++++--------- .../it/scalars/dates/interval_function.rs | 8 +- 4 files changed, 99 insertions(+), 99 deletions(-) diff --git a/common/functions/src/scalars/dates/number_function.rs b/common/functions/src/scalars/dates/number_function.rs index cdc257443fe3b..8a9c07c022b71 100644 --- a/common/functions/src/scalars/dates/number_function.rs +++ b/common/functions/src/scalars/dates/number_function.rs @@ -289,7 +289,7 @@ impl NumberOperator for ToSecond { fn factor_function(_input_type: DataTypeImpl) -> Option> { Some( RoundFunction::try_create( - "toStartOfMinute", + "to_start_of_minute", &[&TimestampType::new_impl(0)], Round::Minute, ) diff --git a/common/functions/tests/it/scalars/dates/date.rs b/common/functions/tests/it/scalars/dates/date.rs index 0c8ff3abe6c46..18d8026e82a7f 100644 --- a/common/functions/tests/it/scalars/dates/date.rs +++ b/common/functions/tests/it/scalars/dates/date.rs @@ -22,13 +22,13 @@ use crate::scalars::scalar_function_test::ScalarFunctionWithFieldTest; #[test] fn test_round_function() -> Result<()> { let ops = vec![ - "toStartOfSecond", - "toStartOfMinute", - "toStartOfTenMinutes", - "toStartOfFifteenMinutes", - "timeSlot", - "toStartOfHour", - "toStartOfDay", + "to_start_of_second", + "to_start_of_minute", + "to_start_of_ten_minutes", + "to_start_of_fifteen_minutes", + "time_slot", + "to_start_of_hour", + "to_start_of_day", ]; let rounds = vec![1, 60, 60 * 10, 60 * 15, 60 * 30, 60 * 60, 60 * 60 * 24]; @@ -62,5 +62,5 @@ fn test_to_start_of_function() -> Result<()> { error: "", }]; - test_scalar_functions_with_type("toStartOfQuarter", &test) + test_scalar_functions_with_type("to_start_of_quarter", &test) } diff --git a/common/functions/tests/it/scalars/dates/date_function.rs b/common/functions/tests/it/scalars/dates/date_function.rs index 1c9120d89a3af..0663b63e4a2fd 100644 --- a/common/functions/tests/it/scalars/dates/date_function.rs +++ b/common/functions/tests/it/scalars/dates/date_function.rs @@ -22,10 +22,10 @@ use crate::scalars::scalar_function_test::test_scalar_functions_with_type; use crate::scalars::scalar_function_test::ScalarFunctionWithFieldTest; #[test] -fn test_toyyyymm_function() -> Result<()> { +fn test_to_yyyymm_function() -> Result<()> { let tests = vec![ ScalarFunctionWithFieldTest { - name: "test_toyyyymm_date16", + name: "test_to_yyyymm_date16", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -34,7 +34,7 @@ fn test_toyyyymm_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymm_date32", + name: "test_to_yyyymm_date32", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32, 1, 2, 3]), DataField::new("dummy_1", DateType::new_impl()), @@ -43,7 +43,7 @@ fn test_toyyyymm_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymm_datetime", + name: "test_to_yyyymm_datetime", columns: vec![ColumnWithField::new( Series::from_data(vec![0i64]), DataField::new("dummy_1", TimestampType::new_impl(0)), @@ -52,7 +52,7 @@ fn test_toyyyymm_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymm_constant_date16", + name: "test_to_yyyymm_constant_date16", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -61,7 +61,7 @@ fn test_toyyyymm_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymm_constant_date32", + name: "test_to_yyyymm_constant_date32", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -70,7 +70,7 @@ fn test_toyyyymm_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymm_constant_datetime", + name: "test_to_yyyymm_constant_datetime", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i64]), 1)), DataField::new("dummy_1", TimestampType::new_impl(0)), @@ -80,14 +80,14 @@ fn test_toyyyymm_function() -> Result<()> { }, ]; - test_scalar_functions_with_type("toYYYYMM", &tests) + test_scalar_functions_with_type("to_yyyymm", &tests) } #[test] fn test_to_yyyymmdd_function() -> Result<()> { let tests = vec![ ScalarFunctionWithFieldTest { - name: "test_toyyyymmdd_date16", + name: "test_to_yyyymmdd_date16", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -96,7 +96,7 @@ fn test_to_yyyymmdd_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymmdd_date32", + name: "test_to_yyyymmdd_date32", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -105,7 +105,7 @@ fn test_to_yyyymmdd_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymmdd_datetime", + name: "test_to_yyyymmdd_datetime", columns: vec![ColumnWithField::new( Series::from_data(vec![1630833797000000i64]), DataField::new("dummy_1", TimestampType::new_impl(0)), @@ -114,7 +114,7 @@ fn test_to_yyyymmdd_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymmdd_date16", + name: "test_to_yyyymmdd_date16", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -123,7 +123,7 @@ fn test_to_yyyymmdd_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymmdd_date32", + name: "test_to_yyyymmdd_date32", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -132,7 +132,7 @@ fn test_to_yyyymmdd_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymmdd_datetime", + name: "test_to_yyyymmdd_datetime", columns: vec![ColumnWithField::new( Series::from_data(vec![1630833797000000i64]), DataField::new("dummy_1", TimestampType::new_impl(0)), @@ -141,7 +141,7 @@ fn test_to_yyyymmdd_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymmdd_constant_date16", + name: "test_to_yyyymmdd_constant_date16", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -150,7 +150,7 @@ fn test_to_yyyymmdd_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymmdd_constant_date32", + name: "test_to_yyyymmdd_constant_date32", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -159,7 +159,7 @@ fn test_to_yyyymmdd_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymmdd_constant_datetime", + name: "test_to_yyyymmdd_constant_datetime", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new( Series::from_data(vec![1630833797000000i64]), @@ -172,14 +172,14 @@ fn test_to_yyyymmdd_function() -> Result<()> { }, ]; - test_scalar_functions_with_type("toYYYYMMDD", &tests) + test_scalar_functions_with_type("to_yyyymmdd", &tests) } #[test] -fn test_toyyyymmddhhmmss_function() -> Result<()> { +fn test_to_yyyymmddhhmmss_function() -> Result<()> { let tests = vec![ ScalarFunctionWithFieldTest { - name: "test_toyyyymmddhhmmss_date16", + name: "test_to_yyyymmddhhmmss_date16", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -188,7 +188,7 @@ fn test_toyyyymmddhhmmss_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymmddhhmmss_date32", + name: "test_to_yyyymmddhhmmss_date32", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -197,7 +197,7 @@ fn test_toyyyymmddhhmmss_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymmddhhmmss_datetime", + name: "test_to_yyyymmddhhmmss_datetime", columns: vec![ColumnWithField::new( Series::from_data(vec![1630833797000000i64]), DataField::new("dummy_1", TimestampType::new_impl(0)), @@ -206,7 +206,7 @@ fn test_toyyyymmddhhmmss_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymmddhhmmss_date16_constant", + name: "test_to_yyyymmddhhmmss_date16_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -215,7 +215,7 @@ fn test_toyyyymmddhhmmss_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymmddhhmmss_date32_constant", + name: "test_to_yyyymmddhhmmss_date32_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -224,7 +224,7 @@ fn test_toyyyymmddhhmmss_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_toyyyymmddhhmmss_datetime_constant", + name: "test_to_yyyymmddhhmmss_datetime_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new( Series::from_data(vec![1630833797000000i64]), @@ -237,14 +237,14 @@ fn test_toyyyymmddhhmmss_function() -> Result<()> { }, ]; - test_scalar_functions_with_type("toYYYYMMDDhhmmss", &tests) + test_scalar_functions_with_type("to_yyyymmddhhmmss", &tests) } #[test] -fn test_tomonth_function() -> Result<()> { +fn test_to_month_function() -> Result<()> { let tests = vec![ ScalarFunctionWithFieldTest { - name: "test_tomonth_date16", + name: "test_to_month_date16", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -253,7 +253,7 @@ fn test_tomonth_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tomonth_date32", + name: "test_to_month_date32", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -262,7 +262,7 @@ fn test_tomonth_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tomonth_datetime", + name: "test_to_month_datetime", columns: vec![ColumnWithField::new( Series::from_data(vec![1633081817000000i64]), DataField::new("dummy_1", TimestampType::new_impl(0)), @@ -271,7 +271,7 @@ fn test_tomonth_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tomonth_date16_constant", + name: "test_to_month_date16_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -280,7 +280,7 @@ fn test_tomonth_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tomonth_date32_constant", + name: "test_to_month_date32_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -289,7 +289,7 @@ fn test_tomonth_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tomonth_datetime_constant", + name: "test_to_month_datetime_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new( Series::from_data(vec![1633081817000000i64]), @@ -302,14 +302,14 @@ fn test_tomonth_function() -> Result<()> { }, ]; - test_scalar_functions_with_type("toMonth", &tests) + test_scalar_functions_with_type("to_month", &tests) } #[test] -fn test_todayofyear_function() -> Result<()> { +fn test_to_day_of_year_function() -> Result<()> { let tests = vec![ ScalarFunctionWithFieldTest { - name: "test_todayofyear_date16", + name: "test_to_day_of_year_date16", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -318,7 +318,7 @@ fn test_todayofyear_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofyear_date32", + name: "test_to_day_of_year_date32", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -327,7 +327,7 @@ fn test_todayofyear_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofyear_datetime", + name: "test_to_day_of_year_datetime", columns: vec![ColumnWithField::new( Series::from_data(vec![1633173324000000i64]), DataField::new("dummy_1", TimestampType::new_impl(0)), @@ -336,7 +336,7 @@ fn test_todayofyear_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofyear_date16_constant", + name: "test_to_day_of_year_date16_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -345,7 +345,7 @@ fn test_todayofyear_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofyear_date32_constant", + name: "test_to_day_of_year_date32_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -354,7 +354,7 @@ fn test_todayofyear_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofyear_datetime_constant", + name: "test_to_day_of_year_datetime_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new( Series::from_data(vec![1633173324000000i64]), @@ -367,14 +367,14 @@ fn test_todayofyear_function() -> Result<()> { }, ]; - test_scalar_functions_with_type("toDayOfYear", &tests) + test_scalar_functions_with_type("to_day_of_year", &tests) } #[test] fn test_todatefweek_function() -> Result<()> { let tests = vec![ ScalarFunctionWithFieldTest { - name: "test_todayofweek_date16", + name: "test_to_day_of_week_date16", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -383,7 +383,7 @@ fn test_todatefweek_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofweek_date32", + name: "test_to_day_of_week_date32", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -392,7 +392,7 @@ fn test_todatefweek_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofweek_datetime", + name: "test_to_day_of_week_datetime", columns: vec![ColumnWithField::new( Series::from_data(vec![1633173324000000i64]), DataField::new("dummy_1", TimestampType::new_impl(0)), @@ -401,7 +401,7 @@ fn test_todatefweek_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofweek_date16_constant", + name: "test_to_day_of_week_date16_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -410,7 +410,7 @@ fn test_todatefweek_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofweek_date32_constant", + name: "test_to_day_of_week_date32_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -419,7 +419,7 @@ fn test_todatefweek_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofweek_datetime_constant", + name: "test_to_day_of_week_datetime_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new( Series::from_data(vec![1633173324000000i64]), @@ -432,14 +432,14 @@ fn test_todatefweek_function() -> Result<()> { }, ]; - test_scalar_functions_with_type("toDayOfWeek", &tests) + test_scalar_functions_with_type("to_day_of_week", &tests) } #[test] -fn test_todayofmonth_function() -> Result<()> { +fn test_to_day_of_month_function() -> Result<()> { let tests = vec![ ScalarFunctionWithFieldTest { - name: "test_todayofmonth_date16", + name: "test_to_day_of_month_date16", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -448,7 +448,7 @@ fn test_todayofmonth_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofmonth_date32", + name: "test_to_day_of_month_date32", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -457,7 +457,7 @@ fn test_todayofmonth_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofmonth_datetime", + name: "test_to_day_of_month_datetime", columns: vec![ColumnWithField::new( Series::from_data(vec![1633173324000000i64]), DataField::new("dummy_1", TimestampType::new_impl(0)), @@ -466,7 +466,7 @@ fn test_todayofmonth_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofmonth_date16_constant", + name: "test_to_day_of_month_date16_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -475,7 +475,7 @@ fn test_todayofmonth_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofmonth_date32_constant", + name: "test_to_day_of_month_date32_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -484,7 +484,7 @@ fn test_todayofmonth_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_todayofmonth_datetime_constant", + name: "test_to_day_of_month_datetime_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new( Series::from_data(vec![1633173324000000i64]), @@ -497,14 +497,14 @@ fn test_todayofmonth_function() -> Result<()> { }, ]; - test_scalar_functions_with_type("toDayOfMonth", &tests) + test_scalar_functions_with_type("to_day_of_month", &tests) } #[test] -fn test_tohour_function() -> Result<()> { +fn test_to_hour_function() -> Result<()> { let tests = vec![ ScalarFunctionWithFieldTest { - name: "test_tohour_date16", + name: "test_to_hour_date16", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -513,7 +513,7 @@ fn test_tohour_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tohour_date32", + name: "test_to_hour_date32", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -522,7 +522,7 @@ fn test_tohour_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tohour_datetime", + name: "test_to_hour_datetime", columns: vec![ColumnWithField::new( Series::from_data(vec![1634551542000000i64]), DataField::new("dummy_1", TimestampType::new_impl(0)), @@ -531,7 +531,7 @@ fn test_tohour_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tohour_date16_constant", + name: "test_to_hour_date16_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -540,7 +540,7 @@ fn test_tohour_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tohour_date32_constant", + name: "test_to_hour_date32_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -549,7 +549,7 @@ fn test_tohour_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tohour_datetime_constant", + name: "test_to_hour_datetime_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new( Series::from_data(vec![1634551542000000i64]), @@ -562,14 +562,14 @@ fn test_tohour_function() -> Result<()> { }, ]; - test_scalar_functions_with_type("toHour", &tests) + test_scalar_functions_with_type("to_hour", &tests) } #[test] -fn test_tominute_function() -> Result<()> { +fn test_to_minute_function() -> Result<()> { let tests = vec![ ScalarFunctionWithFieldTest { - name: "test_tominute_date16", + name: "test_to_minute_date16", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -578,7 +578,7 @@ fn test_tominute_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tominute_date32", + name: "test_to_minute_date32", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -587,7 +587,7 @@ fn test_tominute_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tominute_datetime", + name: "test_to_minute_datetime", columns: vec![ColumnWithField::new( Series::from_data(vec![1634551542000000i64]), DataField::new("dummy_1", TimestampType::new_impl(0)), @@ -596,7 +596,7 @@ fn test_tominute_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tominute_date16_constant", + name: "test_to_minute_date16_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -605,7 +605,7 @@ fn test_tominute_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tominute_date32_constant", + name: "test_to_minute_date32_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -614,7 +614,7 @@ fn test_tominute_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tominute_datetime_constant", + name: "test_to_minute_datetime_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new( Series::from_data(vec![1634551542000000i64]), @@ -627,14 +627,14 @@ fn test_tominute_function() -> Result<()> { }, ]; - test_scalar_functions_with_type("toMinute", &tests) + test_scalar_functions_with_type("to_minute", &tests) } #[test] -fn test_tosecond_function() -> Result<()> { +fn test_to_second_function() -> Result<()> { let tests = vec![ ScalarFunctionWithFieldTest { - name: "test_tosecond_date16", + name: "test_to_second_date16", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -643,7 +643,7 @@ fn test_tosecond_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tosecond_date32", + name: "test_to_second_date32", columns: vec![ColumnWithField::new( Series::from_data(vec![0i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -652,7 +652,7 @@ fn test_tosecond_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tosecond_datetime", + name: "test_to_second_datetime", columns: vec![ColumnWithField::new( Series::from_data(vec![1634551542000000i64]), DataField::new("dummy_1", TimestampType::new_impl(0)), @@ -661,7 +661,7 @@ fn test_tosecond_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tosecond_date16_constant", + name: "test_to_second_date16_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -670,7 +670,7 @@ fn test_tosecond_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tosecond_date32_constant", + name: "test_to_second_date32_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new(Series::from_data(vec![0i32]), 1)), DataField::new("dummy_1", DateType::new_impl()), @@ -679,7 +679,7 @@ fn test_tosecond_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tosecond_datetime_constant", + name: "test_to_second_datetime_constant", columns: vec![ColumnWithField::new( Arc::new(ConstColumn::new( Series::from_data(vec![1634551542000000i64]), @@ -692,14 +692,14 @@ fn test_tosecond_function() -> Result<()> { }, ]; - test_scalar_functions_with_type("toSecond", &tests) + test_scalar_functions_with_type("to_second", &tests) } #[test] -fn test_tomonday_function() -> Result<()> { +fn test_to_monday_function() -> Result<()> { let tests = vec![ ScalarFunctionWithFieldTest { - name: "test_tomonday_date16", + name: "test_to_monday_date16", columns: vec![ColumnWithField::new( Series::from_data(vec![18919i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -708,7 +708,7 @@ fn test_tomonday_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tomonday_date32", + name: "test_to_monday_date32", columns: vec![ColumnWithField::new( Series::from_data(vec![18919i32]), DataField::new("dummy_1", DateType::new_impl()), @@ -717,7 +717,7 @@ fn test_tomonday_function() -> Result<()> { error: "", }, ScalarFunctionWithFieldTest { - name: "test_tomonday_datetime", + name: "test_to_monday_datetime", columns: vec![ColumnWithField::new( Series::from_data(vec![1634614318000000i64]), DataField::new("dummy_1", TimestampType::new_impl(0)), @@ -727,5 +727,5 @@ fn test_tomonday_function() -> Result<()> { }, ]; - test_scalar_functions_with_type("toMonday", &tests) + test_scalar_functions_with_type("to_monday", &tests) } diff --git a/common/functions/tests/it/scalars/dates/interval_function.rs b/common/functions/tests/it/scalars/dates/interval_function.rs index 34b747a350f06..e07d86f12a613 100644 --- a/common/functions/tests/it/scalars/dates/interval_function.rs +++ b/common/functions/tests/it/scalars/dates/interval_function.rs @@ -87,7 +87,7 @@ fn test_add_months() -> Result<()> { expects.reserve(10); for (field, arg) in fields.iter().zip(args.iter()) { let add_months = - AddMonthsFunction::try_create_func("addMonths", 1, &[&DateType::new_impl(), arg])?; + AddMonthsFunction::try_create_func("add_months", 1, &[&DateType::new_impl(), arg])?; let col = add_months.eval( FunctionContext::default(), &[column("date"), column(field)], @@ -115,7 +115,7 @@ fn test_add_months() -> Result<()> { let mut expects: Vec = Vec::new(); expects.reserve(10); for (field, arg) in fields.iter().zip(args.iter()) { - let add_months = AddMonthsFunction::try_create_func("addMonths", 1, &[ + let add_months = AddMonthsFunction::try_create_func("add_months", 1, &[ &TimestampType::new_impl(0), arg, ])?; @@ -204,7 +204,7 @@ fn test_add_subtract_seconds() -> Result<()> { let mut expects: Vec = Vec::new(); expects.reserve(10); for (field, arg) in fields.iter().zip(args.iter()) { - let add_seconds = AddTimesFunction::try_create_func("addSeconds", 1, &[ + let add_seconds = AddTimesFunction::try_create_func("add_seconds", 1, &[ &TimestampType::new_impl(0), arg, ])?; @@ -235,7 +235,7 @@ fn test_add_subtract_seconds() -> Result<()> { let mut expects: Vec = Vec::new(); expects.reserve(10); for (field, arg) in fields.iter().zip(args.iter()) { - let add_seconds = AddTimesFunction::try_create_func("subtractSeconds", -1, &[ + let add_seconds = AddTimesFunction::try_create_func("subtract_seconds", -1, &[ &TimestampType::new_impl(0), arg, ])?; From d81339c12c628050a342e90feed150bdb85e8eb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=82=8E=E6=B3=BC?= Date: Wed, 10 Aug 2022 16:03:47 +0800 Subject: [PATCH 39/59] refactor(meta-api): merge TableIdGen, DatabaseIdGen and ShareIdGen into one id-generator key --- common/meta/api/src/id_generator.rs | 130 +++++++++++++++++++++++++ common/meta/api/src/lib.rs | 6 +- common/meta/api/src/schema_api_impl.rs | 7 +- common/meta/api/src/schema_api_keys.rs | 36 +------ common/meta/api/src/share_api_impl.rs | 4 +- common/meta/api/src/share_api_keys.rs | 17 +--- 6 files changed, 140 insertions(+), 60 deletions(-) create mode 100644 common/meta/api/src/id_generator.rs diff --git a/common/meta/api/src/id_generator.rs b/common/meta/api/src/id_generator.rs new file mode 100644 index 0000000000000..4ba951548e798 --- /dev/null +++ b/common/meta/api/src/id_generator.rs @@ -0,0 +1,130 @@ +// Copyright 2021 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::kv_api_key::check_segment; +use crate::kv_api_key::check_segment_absent; +use crate::kv_api_key::check_segment_present; +use crate::schema_api_keys::ID_GEN_DATABASE; +use crate::schema_api_keys::ID_GEN_TABLE; +use crate::share_api_keys::ID_GEN_SHARE; +use crate::KVApiKey; +use crate::KVApiKeyError; + +pub(crate) const PREFIX_ID_GEN: &str = "__fd_id_gen"; + +/// Key for resource id generator +/// +/// This is a special key for an application to generate unique id with KVApi. +/// Generating an id by updating a record in KVApi and retrieve the seq number. +/// A seq number is monotonically incremental in KVApi. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct IdGenerator { + pub resource: String, +} + +impl IdGenerator { + /// Create a key for generating table id with KVApi + pub fn table_id() -> Self { + Self { + resource: ID_GEN_TABLE.to_string(), + } + } + + /// Create a key for generating database id with KVApi + pub fn database_id() -> Self { + Self { + resource: ID_GEN_DATABASE.to_string(), + } + } + + /// Create a key for generating share id with KVApi + pub fn share_id() -> Self { + Self { + resource: ID_GEN_SHARE.to_string(), + } + } +} + +impl KVApiKey for IdGenerator { + const PREFIX: &'static str = PREFIX_ID_GEN; + + fn to_key(&self) -> String { + format!("{}/{}", Self::PREFIX, self.resource) + } + + fn from_key(s: &str) -> Result { + let mut elts = s.split('/'); + + let prefix = check_segment_present(elts.next(), 0, s)?; + check_segment(prefix, 0, Self::PREFIX)?; + + let resource = check_segment_present(elts.next(), 1, s)?; + + check_segment_absent(elts.next(), 2, s)?; + + Ok(IdGenerator { + resource: resource.to_string(), + }) + } +} + +#[cfg(test)] +mod t { + use crate::id_generator::IdGenerator; + use crate::KVApiKey; + + #[test] + fn test_id_generator() -> anyhow::Result<()> { + // Table id generator + { + let g = IdGenerator::table_id(); + let k = g.to_key(); + assert_eq!("__fd_id_gen/table_id", k); + + let t2 = IdGenerator::from_key(&k)?; + assert_eq!(g, t2); + } + + // Database id generator + { + let g = IdGenerator::database_id(); + let k = g.to_key(); + assert_eq!("__fd_id_gen/database_id", k); + + let t2 = IdGenerator::from_key(&k)?; + assert_eq!(g, t2); + } + + // Share id generator + { + let g = IdGenerator::share_id(); + let k = g.to_key(); + assert_eq!("__fd_id_gen/share_id", k); + + let t2 = IdGenerator::from_key(&k)?; + assert_eq!(g, t2); + } + + Ok(()) + } + + #[test] + fn test_id_generator_from_key_error() -> anyhow::Result<()> { + assert!(IdGenerator::from_key("__fd_id_gen").is_err()); + assert!(IdGenerator::from_key("__fd_id_gen/foo/bar").is_err()); + + assert!(IdGenerator::from_key("__foo/table_id").is_err()); + Ok(()) + } +} diff --git a/common/meta/api/src/lib.rs b/common/meta/api/src/lib.rs index c9957e9c29ce9..29320da72b46e 100644 --- a/common/meta/api/src/lib.rs +++ b/common/meta/api/src/lib.rs @@ -16,6 +16,7 @@ extern crate common_meta_types; mod id; +mod id_generator; mod kv_api; mod kv_api_key; mod kv_api_test_suite; @@ -30,6 +31,7 @@ mod share_api_keys; mod share_api_test_suite; pub use id::Id; +pub(crate) use id_generator::IdGenerator; pub use kv_api::get_start_and_end_of_prefix; pub use kv_api::prefix_of_string; pub use kv_api::ApiBuilder; @@ -55,13 +57,9 @@ pub use kv_api_utils::txn_op_put; pub use kv_api_utils::TXN_MAX_RETRY_TIMES; pub use schema_api::SchemaApi; pub(crate) use schema_api_impl::get_db_or_err; -pub use schema_api_keys::DatabaseIdGen; -pub use schema_api_keys::TableIdGen; -pub(crate) use schema_api_keys::PREFIX_ID_GEN; pub use schema_api_test_suite::SchemaApiTestSuite; pub use share_api::ShareApi; pub(crate) use share_api_impl::get_share_account_meta_or_err; pub(crate) use share_api_impl::get_share_id_to_name_or_err; pub(crate) use share_api_impl::get_share_meta_by_id_or_err; -pub use share_api_keys::ShareIdGen; pub use share_api_test_suite::ShareApiTestSuite; diff --git a/common/meta/api/src/schema_api_impl.rs b/common/meta/api/src/schema_api_impl.rs index 1a35c72a8ff2c..0737b341c85ab 100644 --- a/common/meta/api/src/schema_api_impl.rs +++ b/common/meta/api/src/schema_api_impl.rs @@ -103,11 +103,10 @@ use crate::table_has_to_exist; use crate::txn_cond_seq; use crate::txn_op_del; use crate::txn_op_put; -use crate::DatabaseIdGen; +use crate::IdGenerator; use crate::KVApi; use crate::KVApiKey; use crate::SchemaApi; -use crate::TableIdGen; use crate::TXN_MAX_RETRY_TIMES; const DEFAULT_DATA_RETENTION_SECONDS: i64 = 24 * 60 * 60; @@ -173,7 +172,7 @@ impl SchemaApi for KV { // append db_id into _fd_db_id_list// // (db_id) -> (tenant,db_name) - let db_id = fetch_id(self, DatabaseIdGen {}).await?; + let db_id = fetch_id(self, IdGenerator::database_id()).await?; let id_key = DatabaseId { db_id }; let id_to_name_key = DatabaseIdToName { db_id }; @@ -784,7 +783,7 @@ impl SchemaApi for KV { // append table_id into _fd_table_id_list/db_id/table_name // (table_id) -> table_name - let table_id = fetch_id(self, TableIdGen {}).await?; + let table_id = fetch_id(self, IdGenerator::table_id()).await?; let tbid = TableId { table_id }; diff --git a/common/meta/api/src/schema_api_keys.rs b/common/meta/api/src/schema_api_keys.rs index 0d9c116c4090c..a1a9ed69b9b35 100644 --- a/common/meta/api/src/schema_api_keys.rs +++ b/common/meta/api/src/schema_api_keys.rs @@ -14,8 +14,6 @@ //! Defines structured keys used by SchemaApi -use std::fmt::Debug; - use common_meta_app::schema::CountTablesKey; use common_meta_app::schema::DBIdTableName; use common_meta_app::schema::DatabaseId; @@ -42,18 +40,12 @@ const PREFIX_DB_ID_LIST: &str = "__fd_db_id_list"; const PREFIX_TABLE: &str = "__fd_table"; const PREFIX_TABLE_BY_ID: &str = "__fd_table_by_id"; const PREFIX_TABLE_ID_LIST: &str = "__fd_table_id_list"; -pub(crate) const PREFIX_ID_GEN: &str = "__fd_id_gen"; const PREFIX_TABLE_COUNT: &str = "__fd_table_count"; const PREFIX_DATABASE_ID_TO_NAME: &str = "__fd_database_id_to_name"; const PREFIX_TABLE_ID_TO_NAME: &str = "__fd_table_id_to_name"; -/// Key for database id generator -#[derive(Debug, Clone)] -pub struct DatabaseIdGen {} - -/// Key for table id generator -#[derive(Debug, Clone)] -pub struct TableIdGen {} +pub(crate) const ID_GEN_TABLE: &str = "table_id"; +pub(crate) const ID_GEN_DATABASE: &str = "database_id"; /// __fd_database// -> impl KVApiKey for DatabaseNameIdent { @@ -279,30 +271,6 @@ impl KVApiKey for TableIdListKey { } } -impl KVApiKey for DatabaseIdGen { - const PREFIX: &'static str = PREFIX_ID_GEN; - - fn to_key(&self) -> String { - format!("{}/database_id", Self::PREFIX) - } - - fn from_key(_s: &str) -> Result { - unimplemented!() - } -} - -impl KVApiKey for TableIdGen { - const PREFIX: &'static str = PREFIX_ID_GEN; - - fn to_key(&self) -> String { - format!("{}/table_id", Self::PREFIX) - } - - fn from_key(_s: &str) -> Result { - unimplemented!() - } -} - /// "__fd_table_count/" -> impl KVApiKey for CountTablesKey { const PREFIX: &'static str = PREFIX_TABLE_COUNT; diff --git a/common/meta/api/src/share_api_impl.rs b/common/meta/api/src/share_api_impl.rs index ffa58d88f8de0..52cb5e2524f2f 100644 --- a/common/meta/api/src/share_api_impl.rs +++ b/common/meta/api/src/share_api_impl.rs @@ -68,6 +68,7 @@ use crate::fetch_id; use crate::get_db_or_err; use crate::get_struct_value; use crate::get_u64_value; +use crate::id_generator::IdGenerator; use crate::send_txn; use crate::serialize_struct; use crate::serialize_u64; @@ -77,7 +78,6 @@ use crate::txn_op_del; use crate::txn_op_put; use crate::KVApi; use crate::ShareApi; -use crate::ShareIdGen; use crate::TXN_MAX_RETRY_TIMES; /// ShareApi is implemented upon KVApi. @@ -154,7 +154,7 @@ impl ShareApi for KV { // (share_id) -> share_meta // (share) -> (tenant,share_name) - let share_id = fetch_id(self, ShareIdGen {}).await?; + let share_id = fetch_id(self, IdGenerator::share_id()).await?; let id_key = ShareId { share_id }; let id_to_name_key = ShareIdToName { share_id }; diff --git a/common/meta/api/src/share_api_keys.rs b/common/meta/api/src/share_api_keys.rs index f3ff535b877c8..4965abf95fd25 100644 --- a/common/meta/api/src/share_api_keys.rs +++ b/common/meta/api/src/share_api_keys.rs @@ -28,28 +28,13 @@ use kv_api_key::unescape; use crate::kv_api_key; use crate::KVApiKey; use crate::KVApiKeyError; -use crate::PREFIX_ID_GEN; const PREFIX_SHARE: &str = "__fd_share"; const PREFIX_SHARE_ID: &str = "__fd_share_id"; const PREFIX_SHARE_ID_TO_NAME: &str = "__fd_share_id_to_name"; const PREFIX_SHARE_ACCOUNT_ID: &str = "__fd_share_account_id"; -/// Key for share id generator -#[derive(Debug, Clone)] -pub struct ShareIdGen {} - -impl KVApiKey for ShareIdGen { - const PREFIX: &'static str = PREFIX_ID_GEN; - - fn to_key(&self) -> String { - format!("{}/share_id", Self::PREFIX) - } - - fn from_key(_s: &str) -> Result { - unimplemented!() - } -} +pub(crate) const ID_GEN_SHARE: &str = "share_id"; /// __fd_share// -> impl KVApiKey for ShareNameIdent { From b0e3e13a55fd17e4cf48c1af27cbc1bd88180f56 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Wed, 10 Aug 2022 20:16:46 +0800 Subject: [PATCH 40/59] feat(expression): add a helper trait ColumnFrom --- common/expression/src/lib.rs | 2 + common/expression/src/types.rs | 4 + common/expression/src/types/number.rs | 15 ++ common/expression/src/values.rs | 13 +- common/expression/tests/it/expression.rs | 254 +++++++---------------- common/expression/tests/it/kernel.rs | 91 +++----- 6 files changed, 136 insertions(+), 243 deletions(-) diff --git a/common/expression/src/lib.rs b/common/expression/src/lib.rs index d0ea64e5a02e5..4fd49e041a543 100755 --- a/common/expression/src/lib.rs +++ b/common/expression/src/lib.rs @@ -24,6 +24,7 @@ #[allow(dead_code)] mod chunk; +mod column_from; mod display; mod error; mod evaluator; @@ -37,6 +38,7 @@ pub mod util; mod values; pub use crate::chunk::*; +pub use crate::column_from::*; pub use crate::error::*; pub use crate::evaluator::*; pub use crate::expression::*; diff --git a/common/expression/src/types.rs b/common/expression/src/types.rs index b20cd2213820f..8e6836c4169c0 100755 --- a/common/expression/src/types.rs +++ b/common/expression/src/types.rs @@ -118,6 +118,10 @@ pub trait ArgType: ValueType { fn data_type() -> DataType; fn create_builder(capacity: usize, generics: &GenericMap) -> Self::ColumnBuilder; + fn column_from_vec(vec: Vec, generics: &GenericMap) -> Self::Column { + Self::column_from_iter(vec.iter().cloned(), generics) + } + fn column_from_iter( iter: impl Iterator, generics: &GenericMap, diff --git a/common/expression/src/types/number.rs b/common/expression/src/types/number.rs index e1342c75b6c91..0f36117dff4ae 100644 --- a/common/expression/src/types/number.rs +++ b/common/expression/src/types/number.rs @@ -51,6 +51,17 @@ pub trait Number: Debug + Clone + PartialEq + 'static { #[derive(Debug, Clone, PartialEq, Eq)] pub struct NumberType(PhantomData); +pub type Int8Type = NumberType; +pub type Int16Type = NumberType; +pub type Int32Type = NumberType; +pub type Int64Type = NumberType; +pub type UInt8Type = NumberType; +pub type UInt16Type = NumberType; +pub type UInt32Type = NumberType; +pub type UInt64Type = NumberType; +pub type Float32Type = NumberType; +pub type Float64Type = NumberType; + impl ValueType for NumberType { type Scalar = Num::Storage; type ScalarRef<'a> = Num::Storage; @@ -153,6 +164,10 @@ impl ArgType for NumberType { Vec::with_capacity(capacity) } + fn column_from_vec(vec: Vec, _generics: &GenericMap) -> Self::Column { + vec.into() + } + fn column_from_iter(iter: impl Iterator, _: &GenericMap) -> Self::Column { iter.collect() } diff --git a/common/expression/src/values.rs b/common/expression/src/values.rs index 8e63638ddf5b0..9b8f42f035831 100755 --- a/common/expression/src/values.rs +++ b/common/expression/src/values.rs @@ -17,6 +17,7 @@ use std::ops::Range; use common_arrow::arrow::bitmap::Bitmap; use common_arrow::arrow::bitmap::MutableBitmap; use common_arrow::arrow::buffer::Buffer; +use common_arrow::arrow::datatypes::DataType as ArrowType; use common_arrow::arrow::trusted_len::TrustedLen; use enum_as_inner::EnumAsInner; use itertools::Itertools; @@ -704,10 +705,14 @@ impl Column { None, )) } - Column::Nullable(col) => col - .column - .as_arrow() - .with_validity(Some(col.validity.clone())), + Column::Nullable(col) => { + let arrow_array = col.column.as_arrow(); + match arrow_array.data_type() { + ArrowType::Null => arrow_array, + ArrowType::Extension(_, t, _) if **t == ArrowType::Null => arrow_array, + _ => arrow_array.with_validity(Some(col.validity.clone())), + } + } Column::Tuple { fields, .. } => { Box::new(common_arrow::arrow::array::StructArray::from_data( self.arrow_type(), diff --git a/common/expression/tests/it/expression.rs b/common/expression/tests/it/expression.rs index 697023a6ffdc6..a1e07440e8343 100644 --- a/common/expression/tests/it/expression.rs +++ b/common/expression/tests/it/expression.rs @@ -21,7 +21,6 @@ use common_ast::DisplayError; use common_expression::type_check; use common_expression::types::array::ArrayColumn; use common_expression::types::nullable::NullableColumn; -use common_expression::types::string::StringColumn; use common_expression::types::ArrayType; use common_expression::types::DataType; use common_expression::types::*; @@ -31,6 +30,7 @@ use common_expression::BooleanDomain; use common_expression::Chunk; use common_expression::Column; use common_expression::ColumnBuilder; +use common_expression::ColumnFrom; use common_expression::ConstantFolder; use common_expression::Domain; use common_expression::Evaluator; @@ -62,37 +62,25 @@ pub fn test_pass() { run_ast(&mut file, "plus(a, 10)", &[( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), )]); run_ast(&mut file, "plus(a, b)", &[ ( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), + Column::from_data_valids(vec![1u8, 2, 3], vec![false, true, true]), ), ]); run_ast(&mut file, "plus(a, b)", &[ ( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), ), ("b", DataType::Null, Column::Null { len: 3 }), ]); @@ -100,28 +88,19 @@ pub fn test_pass() { run_ast(&mut file, "minus(a, 10)", &[( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), )]); run_ast(&mut file, "minus(a, b)", &[ ( "a", DataType::Nullable(Box::new(DataType::UInt16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt16(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u16, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int16(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), + Column::from_data_valids(vec![1i16, 2, 3], vec![false, true, true]), ), ]); @@ -129,10 +108,7 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u16, 11, 12], vec![false, true, false]), ), ("b", DataType::Null, Column::Null { len: 3 }), ]); @@ -140,28 +116,19 @@ pub fn test_pass() { run_ast(&mut file, "multiply(a, 10)", &[( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), )]); run_ast(&mut file, "multiply(a, b)", &[ ( "a", DataType::Nullable(Box::new(DataType::UInt16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt16(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u16, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int16(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), + Column::from_data_valids(vec![1i16, 2, 3], vec![false, true, true]), ), ]); @@ -169,18 +136,12 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::UInt32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt32(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u32, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int32(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), + Column::from_data_valids(vec![1i32, 2, 3], vec![false, true, true]), ), ]); @@ -188,10 +149,7 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), ), ("b", DataType::Null, Column::Null { len: 3 }), ]); @@ -199,28 +157,19 @@ pub fn test_pass() { run_ast(&mut file, "divide(a, 10)", &[( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), )]); run_ast(&mut file, "divide(a, b)", &[ ( "a", DataType::Nullable(Box::new(DataType::UInt16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt16(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u16, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int16(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), + Column::from_data_valids(vec![1i16, 2, 3], vec![false, true, true]), ), ]); @@ -228,10 +177,7 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), ), ("b", DataType::Null, Column::Null { len: 3 }), ]); @@ -239,28 +185,19 @@ pub fn test_pass() { run_ast(&mut file, "avg(a, 10)", &[( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), )]); run_ast(&mut file, "avg(a, b)", &[ ( "a", DataType::Nullable(Box::new(DataType::UInt16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt16(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u16, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int16)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int16(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), + Column::from_data_valids(vec![1i16, 2, 3], vec![false, true, true]), ), ]); @@ -268,18 +205,12 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::UInt32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt32(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10u32, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int32(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), + Column::from_data_valids(vec![1i32, 2, 3], vec![false, true, true]), ), ]); @@ -287,18 +218,12 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::Float32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Float32(vec![10f32, 11f32, 12f32].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10f32, 11f32, 12f32], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Int32(vec![1, 2, 3].into()), - validity: vec![false, true, true].into(), - })), + Column::from_data_valids(vec![1i32, 2, 3], vec![false, true, true]), ), ]); @@ -306,18 +231,12 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::Float32)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Float32(vec![10f32, 11f32, 12f32].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10f32, 11f32, 12f32], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Float64)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Float64(vec![1f64, 2f64, 3f64].into()), - validity: vec![false, true, true].into(), - })), + Column::from_data_valids(vec![1f64, 2f64, 3f64], vec![false, true, true]), ), ]); @@ -325,10 +244,7 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::Int8)), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![10, 11, 12], vec![false, true, false]), ), ("b", DataType::Null, Column::Null { len: 3 }), ]); @@ -336,10 +252,7 @@ pub fn test_pass() { run_ast(&mut file, "NOT a", &[( "a", DataType::Nullable(Box::new(DataType::Boolean)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Boolean(vec![true, false, true].into()), - validity: vec![false, true, false].into(), - })), + Column::from_data_valids(vec![true, false, true], vec![false, true, false]), )]); run_ast(&mut file, "NOT a", &[("a", DataType::Null, Column::Null { @@ -351,39 +264,28 @@ pub fn test_pass() { ( "a", DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, 4].into()), + Column::from_data(vec![0i16, 1, 2, 3, 4]), ), ( "b", DataType::Nullable(Box::new(DataType::String)), - Column::Nullable(Box::new(NullableColumn { - column: Column::String(StringColumn { - data: "abcde".as_bytes().to_vec().into(), - offsets: vec![0, 1, 2, 3, 4, 5].into(), - }), - validity: vec![true, true, false, false, false].into(), - })), + Column::from_data_valids(vec!["a", "b", "c", "d", "e"], vec![ + true, true, false, false, false, + ]), ), ]); run_ast(&mut file, "get_tuple(1)(create_tuple(a, b))", &[ ( "a", DataType::Nullable(Box::new(DataType::Boolean)), - Column::Nullable(Box::new(NullableColumn { - column: Column::Boolean(vec![false; 5].into()), - validity: vec![true, true, false, false, false].into(), - })), + Column::from_data_valids(vec![false; 5], vec![true, true, false, false, false]), ), ( "b", DataType::Nullable(Box::new(DataType::String)), - Column::Nullable(Box::new(NullableColumn { - column: Column::String(StringColumn { - data: "abcde".as_bytes().to_vec().into(), - offsets: vec![0, 1, 2, 3, 4, 5].into(), - }), - validity: vec![true, true, false, false, false].into(), - })), + Column::from_data_valids(vec!["a", "b", "c", "d", "e"], vec![ + true, true, false, false, false, + ]), ), ]); run_ast(&mut file, "create_array()", &[]); @@ -392,12 +294,12 @@ pub fn test_pass() { ( "a", DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, 4].into()), + Column::from_data(vec![0i16, 1, 2, 3, 4]), ), ( "b", DataType::Int16, - Column::Int16(vec![5, 6, 7, 8, 9].into()), + Column::from_data(vec![5i16, 6, 7, 8, 9]), ), ]); run_ast( @@ -407,12 +309,12 @@ pub fn test_pass() { ( "a", DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, 4].into()), + Column::from_data(vec![0i16, 1, 2, 3, 4]), ), ( "b", DataType::Int16, - Column::Int16(vec![5, 6, 7, 8, 9].into()), + Column::from_data(vec![5i16, 6, 7, 8, 9]), ), ], ); @@ -428,7 +330,7 @@ pub fn test_pass() { ( "b", DataType::UInt8, - Column::UInt8(vec![0, 1, 2, 3, 4].into()), + Column::from_data(vec![0u8, 1, 2, 3, 4]), ), ]); run_ast(&mut file, "get(a, b)", &[ @@ -450,23 +352,23 @@ pub fn test_pass() { ( "b", DataType::UInt8, - Column::UInt8(vec![0, 1, 2, 3, 4].into()), + Column::from_data(vec![0u8, 1, 2, 3, 4]), ), ]); run_ast(&mut file, "TRY_CAST(a AS UINT8)", &[( "a", DataType::UInt16, - Column::UInt16(vec![0, 64, 255, 512, 1024].into()), + Column::from_data(vec![0u16, 64, 255, 512, 1024]), )]); run_ast(&mut file, "TRY_CAST(a AS UINT16)", &[( "a", DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, -4].into()), + Column::from_data(vec![0i16, 1, 2, 3, -4]), )]); run_ast(&mut file, "TRY_CAST(a AS INT64)", &[( "a", DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, -4].into()), + Column::from_data(vec![0i16, 1, 2, 3, -4]), )]); run_ast( &mut file, @@ -475,32 +377,26 @@ pub fn test_pass() { ( "a", DataType::UInt64, - Column::UInt64( - vec![ - 0, - 1, - u8::MAX as u64, - u16::MAX as u64, - u32::MAX as u64, - u64::MAX, - ] - .into(), - ), + Column::from_data(vec![ + 0, + 1, + u8::MAX as u64, + u16::MAX as u64, + u32::MAX as u64, + u64::MAX, + ]), ), ( "b", DataType::Float64, - Column::Float64( - vec![ - 0.0, - u32::MAX as f64, - u64::MAX as f64, - f64::MIN, - f64::MAX, - f64::INFINITY, - ] - .into(), - ), + Column::from_data(vec![ + 0.0, + u32::MAX as f64, + u64::MAX as f64, + f64::MIN, + f64::MAX, + f64::INFINITY, + ]), ), ], ); @@ -511,12 +407,12 @@ pub fn test_pass() { ( "a", DataType::Int16, - Column::Int16(vec![0, 1, 2, 127, 255].into()), + Column::from_data(vec![0i16, 1, 2, 127, 255]), ), ( "b", DataType::Int16, - Column::Int16(vec![0, -1, -127, -128, -129].into()), + Column::from_data(vec![0i16, -1, -127, -128, -129]), ), ], ); @@ -527,12 +423,12 @@ pub fn test_pass() { ( "a", DataType::Int16, - Column::Int16(vec![0, 1, 2, 127, 256].into()), + Column::from_data(vec![0i16, 1, 2, 127, 256]), ), ( "b", DataType::Int16, - Column::Int16(vec![0, 1, -127, -128, -129].into()), + Column::from_data(vec![0i16, 1, -127, -128, -129]), ), ], ); @@ -540,13 +436,13 @@ pub fn test_pass() { run_ast(&mut file, "CAST(a AS INT16)", &[( "a", DataType::Float64, - Column::Float64(vec![0.0f64, 1.1, 2.2, 3.3, -4.4].into()), + Column::from_data(vec![0.0f64, 1.1, 2.2, 3.3, -4.4]), )]); run_ast(&mut file, "CAST(b AS INT16)", &[( "b", DataType::Int8, - Column::Int8(vec![0, 1, 2, 3, -4].into()), + Column::from_data(vec![0i8, 1, 2, 3, -4]), )]); } @@ -560,7 +456,7 @@ pub fn test_tyck_fail() { run_ast(&mut file, "least(1, 2, 3, a)", &[( "a", DataType::Boolean, - Column::Boolean(vec![false; 3].into()), + Column::from_data(vec![false; 3]), )]); run_ast(&mut file, "create_array('a', 1)", &[]); run_ast(&mut file, "create_array('a', null, 'b', true)", &[]); @@ -578,29 +474,29 @@ pub fn test_eval_fail() { ( "a", DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, 4].into()), + Column::from_data(vec![0i16, 1, 2, 3, 4]), ), ( "b", DataType::Int16, - Column::Int16(vec![5, 6, 7, 8, 9].into()), + Column::from_data(vec![5i16, 6, 7, 8, 9]), ), ( "idx", DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, 4].into()), + Column::from_data(vec![0i16, 1, 2, 3, 4]), ), ]); run_ast(&mut file, "CAST(a AS UINT16)", &[( "a", DataType::Int16, - Column::Int16(vec![0, 1, 2, 3, -4].into()), + Column::from_data(vec![0i16, 1, 2, 3, -4]), )]); run_ast(&mut file, "CAST(c AS INT16)", &[( "c", DataType::Int64, - Column::Int64(vec![0, 11111111111, 2, 3, -4].into()), + Column::from_data(vec![0i64, 11111111111, 2, 3, -4]), )]); } diff --git a/common/expression/tests/it/kernel.rs b/common/expression/tests/it/kernel.rs index ce05fef827f28..daf2aa99358b2 100644 --- a/common/expression/tests/it/kernel.rs +++ b/common/expression/tests/it/kernel.rs @@ -18,6 +18,7 @@ use common_expression::types::nullable::NullableColumn; use common_expression::types::string::StringColumn; use common_expression::Chunk; use common_expression::Column; +use common_expression::ColumnFrom; use common_expression::Value; use goldenfile::Mint; @@ -31,83 +32,58 @@ pub fn test_pass() { Column::Boolean(vec![true, false, false, false, true].into()), &[ Column::Int32(vec![0, 1, 2, 3, -4].into()), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12, 13, 14].into()), - validity: vec![false, true, false, false, false].into(), - })), + Column::from_data_valids(vec![10u8, 11, 12, 13, 14], vec![ + false, true, false, false, false, + ]), Column::Null { len: 5 }, - Column::Nullable(Box::new(NullableColumn { - column: Column::String(StringColumn { - data: "abcde".as_bytes().to_vec().into(), - offsets: vec![0, 1, 2, 3, 4, 5].into(), - }), - validity: vec![true, true, false, false, false].into(), - })), + Column::from_data_valids(vec!["a", "b", "c", "d", "e"], vec![ + true, true, false, false, false, + ]), ], ); run_filter( &mut file, - Column::Nullable(Box::new(NullableColumn { - column: Column::Boolean(vec![true, true, false, true, true].into()), - validity: vec![false, true, true, false, false].into(), - })), + Column::from_data_valids(vec![true, true, false, true, true], vec![ + false, true, true, false, false, + ]), &[ Column::Int32(vec![0, 1, 2, 3, -4].into()), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12, 13, 14].into()), - validity: vec![false, true, false, false, false].into(), - })), + Column::from_data_valids(vec![10u8, 11, 12, 13, 14], vec![ + false, true, false, false, false, + ]), Column::Null { len: 5 }, - Column::Nullable(Box::new(NullableColumn { - column: Column::String(StringColumn { - data: "xyzab".as_bytes().to_vec().into(), - offsets: vec![0, 1, 2, 3, 4, 5].into(), - }), - validity: vec![false, true, true, false, false].into(), - })), + Column::from_data_valids(vec!["x", "y", "z", "a", "b"], vec![ + false, true, true, false, false, + ]), ], ); run_concat(&mut file, vec![ vec![ Column::Int32(vec![0, 1, 2, 3, -4].into()), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12, 13, 14].into()), - validity: vec![false, true, false, false, false].into(), - })), + Column::from_data_valids(vec![10u8, 11, 12, 13, 14], vec![ + false, true, false, false, false, + ]), Column::Null { len: 5 }, Column::EmptyArray { len: 5 }, - Column::Nullable(Box::new(NullableColumn { - column: Column::String(StringColumn { - data: "xyzab".as_bytes().to_vec().into(), - offsets: vec![0, 1, 2, 3, 4, 5].into(), - }), - validity: vec![false, true, true, false, false].into(), - })), + Column::from_data_valids(vec!["x", "y", "z", "a", "b"], vec![ + false, true, true, false, false, + ]), ], vec![ Column::Int32(vec![5, 6].into()), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![15, 16].into()), - validity: vec![false, true].into(), - })), + Column::from_data_valids(vec![15u8, 16], vec![false, true]), Column::Null { len: 2 }, Column::EmptyArray { len: 2 }, - Column::Nullable(Box::new(NullableColumn { - column: Column::String(StringColumn { - data: "xy".as_bytes().to_vec().into(), - offsets: vec![0, 1, 2].into(), - }), - validity: vec![false, true].into(), - })), + Column::from_data_valids(vec!["x", "y"], vec![false, true]), ], ]); run_take(&mut file, &[0, 3, 1], &[ Column::Int32(vec![0, 1, 2, 3, -4].into()), Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12, 13, 14].into()), + column: Column::UInt8(vec![10u8, 11, 12, 13, 14].into()), validity: vec![false, true, false, false, false].into(), })), Column::Null { len: 5 }, @@ -124,18 +100,13 @@ pub fn test_pass() { &mut file, &[ Column::Int32(vec![0, 1, 2, 3, -4].into()), - Column::Nullable(Box::new(NullableColumn { - column: Column::UInt8(vec![10, 11, 12, 13, 14].into()), - validity: vec![false, true, false, false, false].into(), - })), + Column::from_data_valids(vec![10, 11, 12, 13, 14], vec![ + false, true, false, false, false, + ]), Column::Null { len: 5 }, - Column::Nullable(Box::new(NullableColumn { - column: Column::String(StringColumn { - data: "xyzab".as_bytes().to_vec().into(), - offsets: vec![0, 1, 2, 3, 4, 5].into(), - }), - validity: vec![false, true, true, false, false].into(), - })), + Column::from_data_valids(vec!["x", "y", "z", "a", "b"], vec![ + false, true, true, false, false, + ]), ], &[0, 0, 1, 2, 1], 3, From cb85e82edc35c3438be02d6cf10ce561f95cd61e Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Wed, 10 Aug 2022 20:16:56 +0800 Subject: [PATCH 41/59] feat(expression): add a helper trait ColumnFrom --- common/expression/src/column_from.rs | 131 +++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100755 common/expression/src/column_from.rs diff --git a/common/expression/src/column_from.rs b/common/expression/src/column_from.rs new file mode 100755 index 0000000000000..6f4473398a978 --- /dev/null +++ b/common/expression/src/column_from.rs @@ -0,0 +1,131 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::iter::Iterator; + +use crate::types::nullable::NullableColumn; +use crate::types::number::*; +use crate::types::*; +use crate::Column; + +/// ColumnFrom is a helper trait to generate columns. +pub trait ColumnFrom { + /// Initialize by name and values. + fn from_data(_: D) -> Column; + + fn from_data_valids(d: D, valids: Vec) -> Column { + let column = Self::from_data(d); + Column::Nullable(Box::new(NullableColumn { + column, + validity: valids.into(), + })) + } +} + +macro_rules! for_common_scalar_values { + ($macro:tt $(, $x:tt)*) => { + $macro! { + [$($x),*], + { Int8Type }, + { Int16Type }, + { Int32Type }, + { Int64Type }, + { UInt8Type }, + { UInt16Type }, + { UInt32Type }, + { UInt64Type }, + { Float32Type }, + { Float64Type }, + { BooleanType }, + { StringType } + } + }; +} + +macro_rules! impl_from_iterator { + ([], $( { $T: ident} ),*) => { + $( + impl<'a, D: Iterator::ScalarRef<'a>>> + ColumnFrom::Scalar; 0]> for Column + { + fn from_data(d: D) -> Column { + $T::upcast_column($T::column_from_ref_iter(d.into_iter(), &[])) + } + } + )* + }; +} + + +macro_rules! impl_from_opt_iterator { + ([], $( { $T: ident} ),*) => { + $( + impl<'a, D: Iterator as ValueType>::ScalarRef<'a>>> + ColumnFrom as ValueType>::Scalar; 0]> for Column + { + fn from_data(d: D) -> Column { + NullableType::<$T>::upcast_column(NullableType::<$T>::column_from_ref_iter( + d.into_iter(), + &[], + )) + } + } + )* + }; +} + + +macro_rules! impl_from_vec { + ([], $( { $T: ident} ),*) => { + $( + impl ColumnFrom::Scalar>, [<$T as ValueType>::Scalar; 1]> for Column { + fn from_data(d: Vec<<$T as ValueType>::Scalar>) -> Column { + $T::upcast_column($T::column_from_vec(d, &[])) + } + } + )* + }; +} + + +macro_rules! impl_from_opt_vec { + ([], $( { $T: ident} ),*) => { + $( + impl + ColumnFrom< + Vec< as ValueType>::Scalar>, + [ as ValueType>::Scalar; 1], + > for Column + { + fn from_data(d: Vec< as ValueType>::Scalar>) -> Column { + NullableType::<$T>::upcast_column(NullableType::<$T>::column_from_vec(d, &[])) + } + } + )* + }; +} + +impl<'a, D: AsRef<[&'a str]>> ColumnFrom; 2]> for Column { + fn from_data(d: D) -> Column { + StringType::upcast_column(StringType::column_from_ref_iter( + d.as_ref().iter().map(|c| c.as_bytes()), + &[], + )) + } +} + +for_common_scalar_values! { impl_from_iterator } +for_common_scalar_values! { impl_from_opt_iterator } +for_common_scalar_values! { impl_from_vec } +for_common_scalar_values! { impl_from_opt_vec } From 57553a29a3303cc7967e6c0d7bc518f1717ebe67 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Wed, 10 Aug 2022 20:30:39 +0800 Subject: [PATCH 42/59] feat(expression): make lint happy --- common/expression/src/column_from.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/common/expression/src/column_from.rs b/common/expression/src/column_from.rs index 6f4473398a978..1b55a8f41cf32 100755 --- a/common/expression/src/column_from.rs +++ b/common/expression/src/column_from.rs @@ -67,7 +67,6 @@ macro_rules! impl_from_iterator { }; } - macro_rules! impl_from_opt_iterator { ([], $( { $T: ident} ),*) => { $( @@ -85,7 +84,6 @@ macro_rules! impl_from_opt_iterator { }; } - macro_rules! impl_from_vec { ([], $( { $T: ident} ),*) => { $( @@ -98,7 +96,6 @@ macro_rules! impl_from_vec { }; } - macro_rules! impl_from_opt_vec { ([], $( { $T: ident} ),*) => { $( From 260fa0bf0519650686aba448817a97edb403c078 Mon Sep 17 00:00:00 2001 From: andylokandy Date: Wed, 10 Aug 2022 20:50:22 +0800 Subject: [PATCH 43/59] feat(parser): add mysql dialect --- common/ast/src/error.rs | 6 ++--- common/ast/src/input.rs | 24 ++++++++++++++++---- common/ast/src/lib.rs | 1 + common/ast/src/parser/expr.rs | 6 ++--- common/ast/src/parser/mod.rs | 10 +++++--- common/ast/src/util.rs | 14 ++++++------ common/ast/tests/it/parser.rs | 7 +++--- query/src/sql/planner/binder/copy.rs | 5 ++-- query/src/sql/planner/binder/ddl/table.rs | 3 ++- query/src/sql/planner/binder/insert.rs | 8 +++++-- query/src/sql/planner/binder/mod.rs | 3 ++- query/src/sql/planner/binder/table.rs | 3 ++- query/src/sql/planner/mod.rs | 3 ++- query/src/sql/planner/semantic/type_check.rs | 3 ++- 14 files changed, 64 insertions(+), 32 deletions(-) diff --git a/common/ast/src/error.rs b/common/ast/src/error.rs index 08057a7436c4a..595d8c192be29 100644 --- a/common/ast/src/error.rs +++ b/common/ast/src/error.rs @@ -88,7 +88,7 @@ impl<'a> nom::error::ParseError> for Error<'a> { span: i[0].clone(), errors: vec![], contexts: vec![], - backtrace: i.1, + backtrace: i.2, } } @@ -122,7 +122,7 @@ impl<'a> nom::error::ContextError> for Error<'a> { impl<'a> Error<'a> { pub fn from_error_kind(input: Input<'a>, kind: ErrorKind) -> Self { - let mut inner = input.1.inner.borrow_mut(); + let mut inner = input.2.inner.borrow_mut(); if let Some(ref mut inner) = *inner { match input.0[0].span.start.cmp(&inner.span.span.start) { Ordering::Equal => { @@ -147,7 +147,7 @@ impl<'a> Error<'a> { span: input.0[0].clone(), errors: vec![kind], contexts: vec![], - backtrace: input.1, + backtrace: input.2, } } } diff --git a/common/ast/src/input.rs b/common/ast/src/input.rs index b9ddb3867d692..c049060a5e232 100644 --- a/common/ast/src/input.rs +++ b/common/ast/src/input.rs @@ -23,7 +23,7 @@ use crate::Backtrace; /// Input tokens slice with a backtrace that records all errors including /// the optional branch. #[derive(Debug, Clone, Copy)] -pub struct Input<'a>(pub &'a [Token<'a>], pub &'a Backtrace<'a>); +pub struct Input<'a>(pub &'a [Token<'a>], pub Dialect, pub &'a Backtrace<'a>); impl<'a> std::ops::Deref for Input<'a> { type Target = [Token<'a>]; @@ -50,19 +50,19 @@ impl<'a> nom::Offset for Input<'a> { impl<'a> nom::Slice> for Input<'a> { fn slice(&self, range: Range) -> Self { - Input(&self.0[range], self.1) + Input(&self.0[range], self.1, self.2) } } impl<'a> nom::Slice> for Input<'a> { fn slice(&self, range: RangeTo) -> Self { - Input(&self.0[range], self.1) + Input(&self.0[range], self.1, self.2) } } impl<'a> nom::Slice> for Input<'a> { fn slice(&self, range: RangeFrom) -> Self { - Input(&self.0[range], self.1) + Input(&self.0[range], self.1, self.2) } } @@ -77,3 +77,19 @@ pub struct WithSpan<'a, T> { pub(crate) span: Input<'a>, pub(crate) elem: T, } + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum Dialect { + MySQL, + #[default] + PostgreSQL, +} + +impl Dialect { + pub fn ident_quote(&self) -> char { + match self { + Dialect::MySQL => '`', + Dialect::PostgreSQL => '"', + } + } +} diff --git a/common/ast/src/lib.rs b/common/ast/src/lib.rs index b558497921b15..2ee2cf072c73c 100644 --- a/common/ast/src/lib.rs +++ b/common/ast/src/lib.rs @@ -29,6 +29,7 @@ pub use visitors::Visitor; pub use visitors::VisitorMut; mod input; +pub use input::Dialect; pub use input::Input; mod util; diff --git a/common/ast/src/parser/expr.rs b/common/ast/src/parser/expr.rs index eb18d57a89474..cf42d16826e65 100644 --- a/common/ast/src/parser/expr.rs +++ b/common/ast/src/parser/expr.rs @@ -1080,13 +1080,13 @@ pub fn literal_string(i: Input) -> IResult { QuotedString }, |token| { - if token.text().starts_with('\'') { + if token.text().starts_with(i.1.ident_quote()) { + Err(ErrorKind::ExpectToken(QuotedString)) + } else { let str = &token.text()[1..token.text().len() - 1]; let unescaped = unescape(str, '\'').ok_or(ErrorKind::Other("invalid escape or unicode"))?; Ok(unescaped) - } else { - Err(ErrorKind::ExpectToken(QuotedString)) } }, )(i) diff --git a/common/ast/src/parser/mod.rs b/common/ast/src/parser/mod.rs index 2d76621403b73..51a1000086ee7 100644 --- a/common/ast/src/parser/mod.rs +++ b/common/ast/src/parser/mod.rs @@ -24,6 +24,7 @@ use common_exception::Result; use self::expr::subexpr; use crate::ast::Expr; use crate::ast::Statement; +use crate::input::Dialect; use crate::input::Input; use crate::parser::statement::statement; use crate::parser::token::Token; @@ -40,9 +41,10 @@ pub fn tokenize_sql(sql: &str) -> Result> { /// Parse a SQL string into `Statement`s. pub fn parse_sql<'a>( sql_tokens: &'a [Token<'a>], + dialect: Dialect, backtrace: &'a Backtrace<'a>, ) -> Result<(Statement<'a>, Option)> { - match statement(Input(sql_tokens, backtrace)) { + match statement(Input(sql_tokens, dialect, backtrace)) { Ok((rest, stmts)) if rest[0].kind == TokenKind::EOI => Ok((stmts.stmt, stmts.format)), Ok((rest, _)) => Err(ErrorCode::SyntaxException( rest[0].display_error("unable to parse rest of the sql".to_string()), @@ -57,9 +59,10 @@ pub fn parse_sql<'a>( /// Parse udf function into Expr pub fn parse_expr<'a>( sql_tokens: &'a [Token<'a>], + dialect: Dialect, backtrace: &'a Backtrace<'a>, ) -> Result> { - match expr::expr(Input(sql_tokens, backtrace)) { + match expr::expr(Input(sql_tokens, dialect, backtrace)) { Ok((rest, expr)) if rest[0].kind == TokenKind::EOI => Ok(expr), Ok((rest, _)) => Err(ErrorCode::SyntaxException( rest[0].display_error("unable to parse rest of the sql".to_string()), @@ -73,10 +76,11 @@ pub fn parse_expr<'a>( pub fn parse_comma_separated_exprs<'a>( sql_tokens: &'a [Token<'a>], + dialect: Dialect, backtrace: &'a Backtrace<'a>, ) -> Result>> { let mut comma_separated_exprs_parser = comma_separated_list0(subexpr(0)); - match comma_separated_exprs_parser(Input(sql_tokens, backtrace)) { + match comma_separated_exprs_parser(Input(sql_tokens, dialect, backtrace)) { Ok((_rest, exprs)) => Ok(exprs), Err(nom::Err::Error(err) | nom::Err::Failure(err)) => { Err(ErrorCode::SyntaxException(err.display_error(()))) diff --git a/common/ast/src/util.rs b/common/ast/src/util.rs index 635810d1fbd54..64f0536dc3928 100644 --- a/common/ast/src/util.rs +++ b/common/ast/src/util.rs @@ -92,17 +92,17 @@ fn non_reserved_identifier( ), move |i| { match_token(QuotedString)(i).and_then(|(i2, token)| { - if token.text().starts_with('\'') { - Err(nom::Err::Error(Error::from_error_kind( - i, - ErrorKind::ExpectToken(Ident), - ))) - } else { + if token.text().starts_with(i.1.ident_quote()) { Ok((i2, Identifier { span: token.clone(), name: token.text()[1..token.text().len() - 1].to_string(), quote: Some(token.text().chars().next().unwrap()), })) + } else { + Err(nom::Err::Error(Error::from_error_kind( + i, + ErrorKind::ExpectToken(Ident), + ))) } }) }, @@ -333,7 +333,7 @@ where .map_err(nom::Err::Error)?; if let Some(elem) = iter.peek() { // Rollback parsing footprint on unused expr elements. - input.1.clear(); + input.2.clear(); Ok((input.slice(input.offset(&elem.span)..), expr)) } else { Ok((rest, expr)) diff --git a/common/ast/tests/it/parser.rs b/common/ast/tests/it/parser.rs index 7b65165bb87de..bb936ce5143c3 100644 --- a/common/ast/tests/it/parser.rs +++ b/common/ast/tests/it/parser.rs @@ -21,6 +21,7 @@ use common_ast::parser::token::*; use common_ast::parser::tokenize_sql; use common_ast::rule; use common_ast::Backtrace; +use common_ast::Dialect; use common_ast::DisplayError; use common_ast::Input; use common_exception::Result; @@ -33,7 +34,7 @@ macro_rules! run_parser { let backtrace = Backtrace::new(); let parser = $parser; let mut parser = rule! { #parser ~ &EOI }; - match parser.parse(Input(&tokens, &backtrace)) { + match parser.parse(Input(&tokens, Dialect::PostgreSQL, &backtrace)) { Ok((i, (output, _))) => { assert_eq!(i[0].kind, TokenKind::EOI); writeln!($file, "---------- Input ----------").unwrap(); @@ -269,7 +270,7 @@ fn test_statement() { for case in cases { let tokens = tokenize_sql(case).unwrap(); let backtrace = Backtrace::new(); - let (stmt, fmt) = parse_sql(&tokens, &backtrace).unwrap(); + let (stmt, fmt) = parse_sql(&tokens, Dialect::PostgreSQL, &backtrace).unwrap(); writeln!(file, "---------- Input ----------").unwrap(); writeln!(file, "{}", case).unwrap(); writeln!(file, "---------- Output ---------").unwrap(); @@ -325,7 +326,7 @@ fn test_statement_error() { for case in cases { let tokens = tokenize_sql(case).unwrap(); let backtrace = Backtrace::new(); - let err = parse_sql(&tokens, &backtrace).unwrap_err(); + let err = parse_sql(&tokens, Dialect::PostgreSQL, &backtrace).unwrap_err(); writeln!(file, "---------- Input ----------").unwrap(); writeln!(file, "{}", case).unwrap(); writeln!(file, "---------- Output ---------").unwrap(); diff --git a/query/src/sql/planner/binder/copy.rs b/query/src/sql/planner/binder/copy.rs index f88a7f4811194..9b0cdc971e557 100644 --- a/query/src/sql/planner/binder/copy.rs +++ b/query/src/sql/planner/binder/copy.rs @@ -21,6 +21,7 @@ use common_ast::ast::Statement; use common_ast::parser::parse_sql; use common_ast::parser::tokenize_sql; use common_ast::Backtrace; +use common_ast::Dialect; use common_exception::ErrorCode; use common_exception::Result; use common_meta_types::UserStageInfo; @@ -325,7 +326,7 @@ impl<'a> Binder { format!("SELECT * FROM {src_catalog_name}.{src_database_name}.{src_table_name}"); let tokens = tokenize_sql(&subquery)?; let backtrace = Backtrace::new(); - let sub_stmt_msg = parse_sql(&tokens, &backtrace)?; + let sub_stmt_msg = parse_sql(&tokens, Dialect::PostgreSQL, &backtrace)?; let sub_stmt = sub_stmt_msg.0; let query = match &sub_stmt { Statement::Query(query) => { @@ -370,7 +371,7 @@ impl<'a> Binder { format!("SELECT * FROM {src_catalog_name}.{src_database_name}.{src_table_name}"); let tokens = tokenize_sql(&subquery)?; let backtrace = Backtrace::new(); - let sub_stmt_msg = parse_sql(&tokens, &backtrace)?; + let sub_stmt_msg = parse_sql(&tokens, Dialect::PostgreSQL, &backtrace)?; let sub_stmt = sub_stmt_msg.0; let query = match &sub_stmt { Statement::Query(query) => { diff --git a/query/src/sql/planner/binder/ddl/table.rs b/query/src/sql/planner/binder/ddl/table.rs index 5f6b46286622f..c3cf53113a78e 100644 --- a/query/src/sql/planner/binder/ddl/table.rs +++ b/query/src/sql/planner/binder/ddl/table.rs @@ -23,6 +23,7 @@ use common_ast::parser::parse_sql; use common_ast::parser::tokenize_sql; use common_ast::walk_expr_mut; use common_ast::Backtrace; +use common_ast::Dialect; use common_datavalues::DataField; use common_datavalues::DataSchemaRef; use common_datavalues::DataSchemaRefExt; @@ -304,7 +305,7 @@ impl<'a> Binder { }; let tokens = tokenize_sql(query.as_str())?; let backtrace = Backtrace::new(); - let (stmt, _) = parse_sql(&tokens, &backtrace)?; + let (stmt, _) = parse_sql(&tokens, Dialect::PostgreSQL, &backtrace)?; self.bind_statement(bind_context, &stmt).await } diff --git a/query/src/sql/planner/binder/insert.rs b/query/src/sql/planner/binder/insert.rs index 06b6a5bd420b9..5d40642d4a06d 100644 --- a/query/src/sql/planner/binder/insert.rs +++ b/query/src/sql/planner/binder/insert.rs @@ -24,6 +24,7 @@ use common_ast::parser::parse_comma_separated_exprs; use common_ast::parser::token::Token; use common_ast::parser::tokenize_sql; use common_ast::Backtrace; +use common_ast::Dialect; use common_datablocks::DataBlock; use common_datavalues::prelude::*; use common_datavalues::DataSchemaRef; @@ -346,8 +347,11 @@ impl<'a> ValueSourceV2<'a> { let sql = std::str::from_utf8(buf).unwrap(); let tokens = tokenize_sql(sql)?; let backtrace = Backtrace::new(); - let exprs = - parse_comma_separated_exprs(&tokens[1..tokens.len() as usize], &backtrace)?; + let exprs = parse_comma_separated_exprs( + &tokens[1..tokens.len() as usize], + Dialect::PostgreSQL, + &backtrace, + )?; let values = exprs_to_datavalue( exprs, diff --git a/query/src/sql/planner/binder/mod.rs b/query/src/sql/planner/binder/mod.rs index a27b2b2da2fba..9a7c31072bba6 100644 --- a/query/src/sql/planner/binder/mod.rs +++ b/query/src/sql/planner/binder/mod.rs @@ -20,6 +20,7 @@ use common_ast::ast::Statement; use common_ast::parser::parse_sql; use common_ast::parser::tokenize_sql; use common_ast::Backtrace; +use common_ast::Dialect; use common_datavalues::DataTypeImpl; use common_exception::Result; use common_meta_types::UserDefinedFunction; @@ -323,7 +324,7 @@ impl<'a> Binder { ) -> Result { let tokens = tokenize_sql(query)?; let backtrace = Backtrace::new(); - let (stmt, _) = parse_sql(&tokens, &backtrace)?; + let (stmt, _) = parse_sql(&tokens, Dialect::PostgreSQL, &backtrace)?; let mut plan = self.bind_statement(bind_context, &stmt).await?; if let Plan::Query { rewrite_kind, .. } = &mut plan { diff --git a/query/src/sql/planner/binder/table.rs b/query/src/sql/planner/binder/table.rs index 257ba3ee23502..b5174f10e2285 100644 --- a/query/src/sql/planner/binder/table.rs +++ b/query/src/sql/planner/binder/table.rs @@ -23,6 +23,7 @@ use common_ast::ast::TimeTravelPoint; use common_ast::parser::parse_sql; use common_ast::parser::tokenize_sql; use common_ast::Backtrace; +use common_ast::Dialect; use common_ast::DisplayError; use common_catalog::catalog::CATALOG_DEFAULT; use common_datavalues::prelude::*; @@ -132,7 +133,7 @@ impl<'a> Binder { .ok_or_else(|| ErrorCode::LogicalError("Invalid VIEW object"))?; let tokens = tokenize_sql(query.as_str())?; let backtrace = Backtrace::new(); - let (stmt, _) = parse_sql(&tokens, &backtrace)?; + let (stmt, _) = parse_sql(&tokens, Dialect::PostgreSQL, &backtrace)?; if let Statement::Query(query) = &stmt { self.bind_query(bind_context, query).await } else { diff --git a/query/src/sql/planner/mod.rs b/query/src/sql/planner/mod.rs index 4e874d0f8099b..8c2ab0288117c 100644 --- a/query/src/sql/planner/mod.rs +++ b/query/src/sql/planner/mod.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use common_ast::parser::parse_sql; use common_ast::parser::tokenize_sql; use common_ast::Backtrace; +use common_ast::Dialect; use common_exception::Result; use parking_lot::RwLock; pub use plans::ScalarExpr; @@ -64,7 +65,7 @@ impl Planner { // Step 1: parse SQL text into AST let tokens = tokenize_sql(sql)?; let backtrace = Backtrace::new(); - let (stmt, format) = parse_sql(&tokens, &backtrace)?; + let (stmt, format) = parse_sql(&tokens, Dialect::PostgreSQL, &backtrace)?; // Step 2: bind AST with catalog, and generate a pure logical SExpr let metadata = Arc::new(RwLock::new(Metadata::create())); diff --git a/query/src/sql/planner/semantic/type_check.rs b/query/src/sql/planner/semantic/type_check.rs index 5b916a1ea7ea0..dee5e40573b97 100644 --- a/query/src/sql/planner/semantic/type_check.rs +++ b/query/src/sql/planner/semantic/type_check.rs @@ -29,6 +29,7 @@ use common_ast::parser::parse_expr; use common_ast::parser::token::Token; use common_ast::parser::tokenize_sql; use common_ast::Backtrace; +use common_ast::Dialect; use common_ast::DisplayError; use common_datavalues::type_coercion::merge_types; use common_datavalues::ArrayType; @@ -1586,7 +1587,7 @@ impl<'a> TypeChecker<'a> { } let backtrace = Backtrace::new(); let sql_tokens = tokenize_sql(udf.definition.as_str())?; - let expr = parse_expr(&sql_tokens, &backtrace)?; + let expr = parse_expr(&sql_tokens, Dialect::PostgreSQL, &backtrace)?; let mut args_map = HashMap::new(); arguments.iter().enumerate().for_each(|(idx, argument)| { if let Some(parameter) = parameters.get(idx) { From 992a6f1fb0edc5033cc2c4a8bf25122939508e72 Mon Sep 17 00:00:00 2001 From: andylokandy Date: Wed, 10 Aug 2022 20:53:30 +0800 Subject: [PATCH 44/59] update style --- common/ast/src/input.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/ast/src/input.rs b/common/ast/src/input.rs index c049060a5e232..446de97bb6c40 100644 --- a/common/ast/src/input.rs +++ b/common/ast/src/input.rs @@ -80,9 +80,9 @@ pub struct WithSpan<'a, T> { #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum Dialect { - MySQL, #[default] PostgreSQL, + MySQL, } impl Dialect { From 49f1eaa1bb3c16d575111cae23cf78d60d0c94a9 Mon Sep 17 00:00:00 2001 From: leiysky Date: Wed, 10 Aug 2022 20:55:36 +0800 Subject: [PATCH 45/59] enable push down filter scan --- query/src/sql/optimizer/heuristic/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/query/src/sql/optimizer/heuristic/mod.rs b/query/src/sql/optimizer/heuristic/mod.rs index e2bf96c090935..f72a3229f2638 100644 --- a/query/src/sql/optimizer/heuristic/mod.rs +++ b/query/src/sql/optimizer/heuristic/mod.rs @@ -54,6 +54,7 @@ pub static DEFAULT_REWRITE_RULES: Lazy> = Lazy::new(|| { RuleID::PushDownFilterProject, RuleID::PushDownFilterJoin, RuleID::SplitAggregate, + RuleID::PushDownFilterScan, ] }); From 8b4a7199e18c06ec01541ac7d21f89877aebd0ef Mon Sep 17 00:00:00 2001 From: andylokandy Date: Wed, 10 Aug 2022 21:10:23 +0800 Subject: [PATCH 46/59] implement string quote dialect --- common/ast/src/input.rs | 13 ++++++++++--- common/ast/src/parser/expr.rs | 12 +++++++++--- common/ast/src/util.rs | 8 +++++++- common/ast/tests/it/parser.rs | 4 ++-- common/ast/tests/it/testdata/expr.txt | 6 +++--- common/ast/tests/it/testdata/statement.txt | 10 +++++----- 6 files changed, 36 insertions(+), 17 deletions(-) diff --git a/common/ast/src/input.rs b/common/ast/src/input.rs index 446de97bb6c40..a296982286405 100644 --- a/common/ast/src/input.rs +++ b/common/ast/src/input.rs @@ -86,10 +86,17 @@ pub enum Dialect { } impl Dialect { - pub fn ident_quote(&self) -> char { + pub fn is_ident_quote(&self, c: char) -> bool { match self { - Dialect::MySQL => '`', - Dialect::PostgreSQL => '"', + Dialect::MySQL => c == '`', + Dialect::PostgreSQL => c == '"', + } + } + + pub fn is_string_quote(&self, c: char) -> bool { + match self { + Dialect::MySQL => c == '\'' || c == '"', + Dialect::PostgreSQL => c == '\'', } } } diff --git a/common/ast/src/parser/expr.rs b/common/ast/src/parser/expr.rs index cf42d16826e65..008fc15345cb3 100644 --- a/common/ast/src/parser/expr.rs +++ b/common/ast/src/parser/expr.rs @@ -1080,13 +1080,19 @@ pub fn literal_string(i: Input) -> IResult { QuotedString }, |token| { - if token.text().starts_with(i.1.ident_quote()) { - Err(ErrorKind::ExpectToken(QuotedString)) - } else { + if token + .text() + .chars() + .next() + .filter(|c| i.1.is_string_quote(*c)) + .is_some() + { let str = &token.text()[1..token.text().len() - 1]; let unescaped = unescape(str, '\'').ok_or(ErrorKind::Other("invalid escape or unicode"))?; Ok(unescaped) + } else { + Err(ErrorKind::ExpectToken(QuotedString)) } }, )(i) diff --git a/common/ast/src/util.rs b/common/ast/src/util.rs index 64f0536dc3928..af07ca771ccef 100644 --- a/common/ast/src/util.rs +++ b/common/ast/src/util.rs @@ -92,7 +92,13 @@ fn non_reserved_identifier( ), move |i| { match_token(QuotedString)(i).and_then(|(i2, token)| { - if token.text().starts_with(i.1.ident_quote()) { + if token + .text() + .chars() + .next() + .filter(|c| i.1.is_ident_quote(*c)) + .is_some() + { Ok((i2, Identifier { span: token.clone(), name: token.text()[1..token.text().len() - 1].to_string(), diff --git a/common/ast/tests/it/parser.rs b/common/ast/tests/it/parser.rs index bb936ce5143c3..22a4bd5165e3d 100644 --- a/common/ast/tests/it/parser.rs +++ b/common/ast/tests/it/parser.rs @@ -110,7 +110,7 @@ fn test_statement() { r#"select * from t4;"#, r#"select * from aa.bb;"#, r#"select * from a, b, c;"#, - r#"select * from a, b, c order by `db`.`a`.`c1`;"#, + r#"select * from a, b, c order by "db"."a"."c1";"#, r#"select * from a join b on a.a = b.a;"#, r#"select * from a left outer join b on a.a = b.a;"#, r#"select * from a right outer join b on a.a = b.a;"#, @@ -427,7 +427,7 @@ fn test_expr() { r#"1 - -(- - -1)"#, r#"1 + a * c.d"#, r#"number % 2"#, - r#"`t`:k1.k2"#, + r#""t":k1.k2"#, r#"col1 not between 1 and 2"#, r#"sum(col1)"#, r#""random"()"#, diff --git a/common/ast/tests/it/testdata/expr.txt b/common/ast/tests/it/testdata/expr.txt index 866cdc39aaf79..d79b72f952a20 100644 --- a/common/ast/tests/it/testdata/expr.txt +++ b/common/ast/tests/it/testdata/expr.txt @@ -1116,9 +1116,9 @@ BinaryOp { ---------- Input ---------- -`t`:k1.k2 +"t":k1.k2 ---------- Output --------- -`t`:k1.k2 +"t":k1.k2 ---------- AST ------------ MapAccess { span: [ @@ -1139,7 +1139,7 @@ MapAccess { column: Identifier { name: "t", quote: Some( - '`', + '"', ), span: QuotedString(0..3), }, diff --git a/common/ast/tests/it/testdata/statement.txt b/common/ast/tests/it/testdata/statement.txt index ccbc9152bb8c5..4e123876c272c 100644 --- a/common/ast/tests/it/testdata/statement.txt +++ b/common/ast/tests/it/testdata/statement.txt @@ -2027,9 +2027,9 @@ Query( ---------- Input ---------- -select * from a, b, c order by `db`.`a`.`c1`; +select * from a, b, c order by "db"."a"."c1"; ---------- Output --------- -SELECT * FROM a, b, c ORDER BY `db`.`a`.`c1` +SELECT * FROM a, b, c ORDER BY "db"."a"."c1" ---------- AST ------------ Query( Query { @@ -2134,7 +2134,7 @@ Query( Identifier { name: "db", quote: Some( - '`', + '"', ), span: QuotedString(31..35), }, @@ -2143,7 +2143,7 @@ Query( Identifier { name: "a", quote: Some( - '`', + '"', ), span: QuotedString(36..39), }, @@ -2151,7 +2151,7 @@ Query( column: Identifier { name: "c1", quote: Some( - '`', + '"', ), span: QuotedString(40..44), }, From 788c809caa09a05ae85a0eaa1539302fd5c69c13 Mon Sep 17 00:00:00 2001 From: leiysky Date: Wed, 10 Aug 2022 21:34:43 +0800 Subject: [PATCH 47/59] enable push down filter scan --- .../planner/format/display_rel_operator.rs | 15 ++++- .../heuristic/testdata/exchange.test | 20 +++--- .../optimizer/heuristic/testdata/join.test | 26 ++++---- .../heuristic/testdata/prune_columns.test | 22 +++---- .../optimizer/heuristic/testdata/select.test | 22 +++---- .../heuristic/testdata/subquery.test | 66 +++++++++---------- 6 files changed, 91 insertions(+), 80 deletions(-) diff --git a/query/src/sql/planner/format/display_rel_operator.rs b/query/src/sql/planner/format/display_rel_operator.rs index dd6cc3f2fd4d4..8ecdbbe3e7be5 100644 --- a/query/src/sql/planner/format/display_rel_operator.rs +++ b/query/src/sql/planner/format/display_rel_operator.rs @@ -231,8 +231,19 @@ pub fn format_physical_scan( let table = metadata.read().table(op.table_index).clone(); write!( f, - "Scan: {}.{}.{}", - &table.catalog, &table.database, &table.name + "Scan: {}.{}.{}, filters: [{}]", + &table.catalog, + &table.database, + &table.name, + op.push_down_predicates.as_ref().map_or_else( + || "".to_string(), + |predicates| { + predicates + .iter() + .map(|pred| format_scalar(metadata, pred)) + .join(", ") + } + ) ) } diff --git a/query/tests/it/sql/optimizer/heuristic/testdata/exchange.test b/query/tests/it/sql/optimizer/heuristic/testdata/exchange.test index 53e3abccb59eb..9f62af99eebc3 100644 --- a/query/tests/it/sql/optimizer/heuristic/testdata/exchange.test +++ b/query/tests/it/sql/optimizer/heuristic/testdata/exchange.test @@ -3,9 +3,9 @@ select * from numbers(1) t, numbers(2) t1 where t.number = t1.number Exchange(Merge) HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [t.number (#0)], join filters: [] Exchange(Hash): keys: [t.number (#0)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] Exchange(Hash): keys: [t1.number (#1)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] # Result of t1 join t is distributed on t.number @@ -15,11 +15,11 @@ Exchange(Merge) HashJoin: INNER, build keys: [t2.number (#2)], probe keys: [t.number (#0)], join filters: [] HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [t.number (#0)], join filters: [] Exchange(Hash): keys: [t.number (#0)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] Exchange(Hash): keys: [t1.number (#1)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] Exchange(Hash): keys: [t2.number (#2)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] select * from (select number as a, number+1 as b from numbers(1)) t, numbers(2) t1, numbers(3) t2 where a = t1.number and b = t2.number @@ -30,11 +30,11 @@ Exchange(Merge) HashJoin: INNER, build keys: [t1.number (#3)], probe keys: [t.a (#0)], join filters: [] Exchange(Hash): keys: [t.a (#0)] EvalScalar: [+(numbers.number (#0), 1)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] Exchange(Hash): keys: [t1.number (#3)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] Exchange(Hash): keys: [t2.number (#4)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] select * from (select sum(number) as number from numbers(1) group by number) t, numbers(2) t1 where t.number = t1.number @@ -47,8 +47,8 @@ Exchange(Merge) Aggregate(Final): group items: [numbers.number (#0)], aggregate functions: [sum(number)] Aggregate(Partial): group items: [numbers.number (#0)], aggregate functions: [sum(number)] Exchange(Hash): keys: [numbers.number (#0)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] Exchange(Hash): keys: [t1.number (#4)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] diff --git a/query/tests/it/sql/optimizer/heuristic/testdata/join.test b/query/tests/it/sql/optimizer/heuristic/testdata/join.test index 1db0eb5edb2b1..a078ce7e59652 100644 --- a/query/tests/it/sql/optimizer/heuristic/testdata/join.test +++ b/query/tests/it/sql/optimizer/heuristic/testdata/join.test @@ -3,16 +3,16 @@ select t.number from numbers(1) as t, numbers(1) as t1 where t.number = t1.numbe ---- Project: [number (#0)] HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [t.number (#0)], join filters: [] - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] select t.number from numbers(1) as t, numbers(1) as t1 where t.number = t1.number and t.number = t1.number + 1 ---- Project: [number (#0)] HashJoin: INNER, build keys: [t1.number (#1), +(t1.number (#1), 1)], probe keys: [t.number (#0), t.number (#0)], join filters: [] - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] # Push predicates down through join @@ -21,9 +21,9 @@ select t.number from numbers(1) as t, numbers(1) as t1 where t.number > 1 and 1 Project: [number (#0)] CrossJoin Filter: [t.number (#0) > 1] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [t.number (#0) > 1] Filter: [1 < t1.number (#1)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [1 < t1.number (#1)] select t.number from numbers(1) as t, numbers(1) as t1 where t.number + t1.number = 1 @@ -31,8 +31,8 @@ select t.number from numbers(1) as t, numbers(1) as t1 where t.number + t1.numbe Project: [number (#0)] Filter: [+(t.number (#0), t1.number (#1)) = 1] CrossJoin - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] # Incompatible join keys, cannot push into join @@ -41,8 +41,8 @@ select t.number from numbers(1) as t, numbers(1) as t1 where t.number = cast(t1. Project: [number (#0)] Filter: [t.number (#0) = CAST(t1.number (#1) AS VARCHAR)] CrossJoin - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] # Join multiple tables @@ -52,8 +52,8 @@ Project: [number (#0)] HashJoin: INNER, build keys: [t2.number (#2)], probe keys: [t1.number (#1)], join filters: [] CrossJoin Filter: [t.number (#0) = 1] - Scan: default.system.numbers - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [t.number (#0) = 1] + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] diff --git a/query/tests/it/sql/optimizer/heuristic/testdata/prune_columns.test b/query/tests/it/sql/optimizer/heuristic/testdata/prune_columns.test index 64fa8e8b07fcf..b28210872703a 100644 --- a/query/tests/it/sql/optimizer/heuristic/testdata/prune_columns.test +++ b/query/tests/it/sql/optimizer/heuristic/testdata/prune_columns.test @@ -7,7 +7,7 @@ Project: [number (#0)] EvalScalar: [numbers.a (#0)] Project: [number (#0)] EvalScalar: [numbers.number (#0)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] # Prune unused columns from Aggregate @@ -19,7 +19,7 @@ Project: [number (#0)] EvalScalar: [group_item (#0)] Aggregate(Initial): group items: [numbers.number (#0)], aggregate functions: [] EvalScalar: [numbers.number (#0)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] # Prune unused columns for simple plan nodes (Project, Filter, Aggregate...) @@ -34,7 +34,7 @@ Limit: [1], Offset: [0] EvalScalar: [group_item (#0)] Aggregate(Initial): group items: [numbers.number (#0), numbers.number (#0), numbers.number (#0), numbers.number (#0)], aggregate functions: [] EvalScalar: [numbers.number (#0), numbers.number (#0), numbers.number (#0), numbers.number (#0), numbers.number (#0)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] # Prune unused columns for join plan nodes (LogicalInnerJoin ...) @@ -48,10 +48,10 @@ Project: [a (#1)] CrossJoin Project: [a (#1),b (#2),c (#3)] EvalScalar: [+(numbers.number (#0), 1), +(numbers.number (#0), 1), +(numbers.number (#0), 1)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] Project: [b (#11)] EvalScalar: [+(numbers.number (#9), 1)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] # Prune unused columns for correlated query @@ -63,7 +63,7 @@ Project: [a (#1)] HashJoin: SINGLE, build keys: [], probe keys: [], join filters: [] Project: [a (#1)] EvalScalar: [+(numbers.number (#0), 1)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] Project: [COUNT(*) (#21)] EvalScalar: [COUNT(*) (#22)] Aggregate(Initial): group items: [], aggregate functions: [COUNT(*)] @@ -73,10 +73,10 @@ Project: [a (#1)] CrossJoin Project: [a (#6),b (#7),c (#8)] EvalScalar: [+(numbers.number (#5), 1), +(numbers.number (#5), 1), +(numbers.number (#5), 1)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] Project: [b (#16)] EvalScalar: [+(numbers.number (#14), 1)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] # Prune unused columns with order by @@ -85,7 +85,7 @@ select name from system.functions order by example Project: [name (#0)] EvalScalar: [functions.name (#0)] Sort: [example (#7) ASC] - Scan: default.system.functions + Scan: default.system.functions, filters: [] # Prune unused columns with cross join @@ -95,13 +95,13 @@ Project: [number (#0)] EvalScalar: [t.number (#0)] Filter: [subquery_3 (#3)] CrossJoin - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] Project: [subquery (#3)] EvalScalar: [count(*) (#2) = 1] Aggregate(Initial): group items: [], aggregate functions: [count(*)] Limit: [1], Offset: [0] Project: [number (#1)] EvalScalar: [numbers.number (#1)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] diff --git a/query/tests/it/sql/optimizer/heuristic/testdata/select.test b/query/tests/it/sql/optimizer/heuristic/testdata/select.test index e2237b9d7ab01..728c24262db10 100644 --- a/query/tests/it/sql/optimizer/heuristic/testdata/select.test +++ b/query/tests/it/sql/optimizer/heuristic/testdata/select.test @@ -1,12 +1,12 @@ select * from numbers(1) ---- -Scan: default.system.numbers +Scan: default.system.numbers, filters: [] select * from (select * from numbers(1)) as t1 where number = 1 ---- Filter: [t1.number (#0) = 1] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [t1.number (#0) = 1] # `b = 1` can not be pushed down @@ -14,54 +14,54 @@ select * from (select number as a, number + 1 as b from numbers(1)) as t1 where ---- Filter: [t1.a (#0) = 1, t1.b (#1) = 1] EvalScalar: [+(numbers.number (#0), 1)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] select * from (select number as a, number + 1 as b from numbers(1)) as t1 where a = 1 ---- EvalScalar: [+(numbers.number (#0), 1)] Filter: [t1.a (#0) = 1] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [t1.a (#0) = 1] select * from numbers(1) where number = pow(1, 1 + 1) ---- Filter: [numbers.number (#0) = 1] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [numbers.number (#0) = 1] select * from numbers(1) where TRUE and 1 = 1 ---- -Scan: default.system.numbers +Scan: default.system.numbers, filters: [] select * from numbers(1) where number = 0 and false ---- Filter: [false] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [false] select * from numbers(1) where number = 0 and null ---- Filter: [false] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [false] # If there is only one conjunction and the value is null, then we won't rewrite it select * from numbers(1) where null ---- Filter: [NULL] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [NULL] select a from (select number as a, number as b from numbers(1)) ---- -Scan: default.system.numbers +Scan: default.system.numbers, filters: [] select a from (select number as a, number+1 as b from numbers(1)) ---- Project: [number (#0)] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] diff --git a/query/tests/it/sql/optimizer/heuristic/testdata/subquery.test b/query/tests/it/sql/optimizer/heuristic/testdata/subquery.test index ea3b6c83b8168..db6c9fa10c1d3 100644 --- a/query/tests/it/sql/optimizer/heuristic/testdata/subquery.test +++ b/query/tests/it/sql/optimizer/heuristic/testdata/subquery.test @@ -5,17 +5,17 @@ Project: [number (#0)] Filter: [t.number (#0) = CAST(if(is_null(scalar_subquery_4 (#4)), 0, scalar_subquery_4 (#4)) AS BIGINT UNSIGNED)] HashJoin: SINGLE, build keys: [subquery_6 (#6)], probe keys: [subquery_0 (#0)], join filters: [] CrossJoin - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] Project: [COUNT(*) (#4),number (#6)] EvalScalar: [COUNT(*) (#5)] Aggregate(Final): group items: [subquery_6 (#6)], aggregate functions: [COUNT(*)] Aggregate(Partial): group items: [subquery_6 (#6)], aggregate functions: [COUNT(*)] HashJoin: INNER, build keys: [t2.number (#2)], probe keys: [subquery_6 (#6)], join filters: [] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] CrossJoin - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] # Exists correlated subquery with joins @@ -25,9 +25,9 @@ Project: [number (#0)] Filter: [(3 (#3)) OR (t.number (#0) > 1)] HashJoin: MARK, build keys: [subquery_0 (#0)], probe keys: [subquery_2 (#2)], join filters: [] HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [subquery_2 (#2)], join filters: [] - Scan: default.system.numbers - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] # Uncorrelated subquery @@ -35,7 +35,7 @@ select t.number from numbers(1) as t where exists (select * from numbers(1) wher ---- Project: [number (#0)] CrossJoin - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] Project: [subquery (#3)] Filter: [subquery_3 (#3)] EvalScalar: [count(*) (#2) = 1] @@ -43,7 +43,7 @@ Project: [number (#0)] Aggregate(Partial): group items: [], aggregate functions: [count(*)] Limit: [1], Offset: [0] Filter: [numbers.number (#1) = 0] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [numbers.number (#1) = 0] # Uncorrelated subquery @@ -52,9 +52,9 @@ select t.number from numbers(1) as t where number = (select * from numbers(1) wh Project: [number (#0)] Filter: [t.number (#0) = scalar_subquery_1 (#1)] HashJoin: SINGLE, build keys: [], probe keys: [], join filters: [] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] Filter: [numbers.number (#1) = 0] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [numbers.number (#1) = 0] # Correlated subquery can be translated to SemiJoin @@ -62,8 +62,8 @@ select t.number from numbers(1) as t where exists (select * from numbers(1) wher ---- Project: [number (#0)] HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [] - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] # Correlated subquery can be translated to AntiJoin @@ -73,17 +73,17 @@ Project: [number (#0)] Filter: [not(3 (#3))] HashJoin: MARK, build keys: [subquery_0 (#0)], probe keys: [subquery_2 (#2)], join filters: [] HashJoin: INNER, build keys: [numbers.number (#1)], probe keys: [subquery_2 (#2)], join filters: [] - Scan: default.system.numbers - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] select * from numbers(1) as t where exists (select number as a from numbers(1) where number = t.number) ---- Project: [number (#0)] HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [] - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] # Exists with different kinds of predicate @@ -92,9 +92,9 @@ select t.number from numbers(1) as t where exists (select * from numbers(1) wher Project: [number (#0)] HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [] Filter: [t.number (#0) < 10] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [t.number (#0) < 10] Filter: [numbers.number (#1) = 0] - Scan: default.system.numbers + Scan: default.system.numbers, filters: [numbers.number (#1) = 0] # Exists with non-equi predicate @@ -102,8 +102,8 @@ select t.number from numbers(1) as t where exists (select * from numbers(1) wher ---- Project: [number (#0)] HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [t.number (#0) < numbers.number (#1)] - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] # Exists project required columns @@ -111,8 +111,8 @@ select t.number from numbers(1) as t where exists (select number as a, number as ---- Project: [number (#0)] HashJoin: SEMI, build keys: [numbers.number (#1)], probe keys: [t.number (#0)], join filters: [] - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] # Push down filter through CrossApply @@ -122,15 +122,15 @@ Project: [number (#0)] Filter: [CAST(if(is_null(scalar_subquery_3 (#3)), 0, scalar_subquery_3 (#3)) AS BIGINT UNSIGNED)] HashJoin: SINGLE, build keys: [subquery_5 (#5)], probe keys: [subquery_0 (#0)], join filters: [] HashJoin: INNER, build keys: [t1.number (#1)], probe keys: [t.number (#0)], join filters: [] - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] Project: [COUNT(*) = 1 (#3),number (#5)] EvalScalar: [COUNT(*) (#4) = 1] Aggregate(Final): group items: [subquery_5 (#5)], aggregate functions: [COUNT(*)] Aggregate(Partial): group items: [subquery_5 (#5)], aggregate functions: [COUNT(*)] HashJoin: INNER, build keys: [numbers.number (#2)], probe keys: [subquery_5 (#5)], join filters: [] - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] # Semi join with other conditions @@ -141,10 +141,10 @@ Project: [number (#0)] HashJoin: MARK, build keys: [subquery_0 (#0)], probe keys: [subquery_3 (#3)], join filters: [] Filter: [subquery_3 (#3) < t1.number (#2)] CrossJoin - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] HashJoin: SEMI, build keys: [], probe keys: [], join filters: [t.number (#0) > t1.number (#1)] - Scan: default.system.numbers - Scan: default.system.numbers + Scan: default.system.numbers, filters: [] + Scan: default.system.numbers, filters: [] From 61ea3718d43f3d4abb54996883ff66ebe99b1dbd Mon Sep 17 00:00:00 2001 From: andylokandy Date: Wed, 10 Aug 2022 21:49:00 +0800 Subject: [PATCH 48/59] fix tests --- common/expression/tests/it/parser.rs | 3 ++- common/functions-v2/tests/it/scalars/parser.rs | 3 ++- query/tests/it/sql/planner/semantic/name_resolution.rs | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/common/expression/tests/it/parser.rs b/common/expression/tests/it/parser.rs index 4119b72cef091..cb94f087a0d2e 100644 --- a/common/expression/tests/it/parser.rs +++ b/common/expression/tests/it/parser.rs @@ -17,6 +17,7 @@ use common_ast::parser::parse_expr; use common_ast::parser::token::Token; use common_ast::parser::tokenize_sql; use common_ast::Backtrace; +use common_ast::Dialect; use common_expression::types::DataType; use common_expression::Literal; use common_expression::RawExpr; @@ -25,7 +26,7 @@ use common_expression::Span; pub fn parse_raw_expr(text: &str, columns: &[(&str, DataType)]) -> RawExpr { let backtrace = Backtrace::new(); let tokens = tokenize_sql(text).unwrap(); - let expr = parse_expr(&tokens, &backtrace).unwrap(); + let expr = parse_expr(&tokens, Dialect::PostgreSQL, &backtrace).unwrap(); transform_expr(expr, columns) } diff --git a/common/functions-v2/tests/it/scalars/parser.rs b/common/functions-v2/tests/it/scalars/parser.rs index 82108b2bc3e0c..c43b6b7f0fb8a 100644 --- a/common/functions-v2/tests/it/scalars/parser.rs +++ b/common/functions-v2/tests/it/scalars/parser.rs @@ -17,6 +17,7 @@ use common_ast::parser::parse_expr; use common_ast::parser::token::Token; use common_ast::parser::tokenize_sql; use common_ast::Backtrace; +use common_ast::Dialect; use common_expression::types::DataType; use common_expression::Literal; use common_expression::RawExpr; @@ -25,7 +26,7 @@ use common_expression::Span; pub fn parse_raw_expr(text: &str, columns: &[(&str, DataType)]) -> RawExpr { let backtrace = Backtrace::new(); let tokens = tokenize_sql(text).unwrap(); - let expr = parse_expr(&tokens, &backtrace).unwrap(); + let expr = parse_expr(&tokens, Dialect::PostgreSQL, &backtrace).unwrap(); transform_expr(expr, columns) } diff --git a/query/tests/it/sql/planner/semantic/name_resolution.rs b/query/tests/it/sql/planner/semantic/name_resolution.rs index d366e87407e53..f31d7f32736b5 100644 --- a/query/tests/it/sql/planner/semantic/name_resolution.rs +++ b/query/tests/it/sql/planner/semantic/name_resolution.rs @@ -19,6 +19,7 @@ use common_ast::parser::token::TokenKind; use common_ast::parser::tokenize_sql; use common_ast::walk_expr_mut; use common_ast::Backtrace; +use common_ast::Dialect; use databend_query::sql::normalize_identifier; use databend_query::sql::IdentifierNormalizer; use databend_query::sql::NameResolutionContext; @@ -124,7 +125,7 @@ fn test_normalize_identifier_unquoted_case_sensitive() { fn test_normalize_identifiers_in_expr() { let tokens = tokenize_sql("exists(select func(\"T\".A+1) as B)").unwrap(); let backtrace = Backtrace::new(); - let mut expr = parse_expr(&tokens, &backtrace).unwrap(); + let mut expr = parse_expr(&tokens, Dialect::PostgreSQL, &backtrace).unwrap(); let ctx = NameResolutionContext::default(); let mut normalizer = IdentifierNormalizer { ctx: &ctx }; From 9ef92d93dc544144aed4d0bc243ac0c45d8f0005 Mon Sep 17 00:00:00 2001 From: andylokandy Date: Wed, 10 Aug 2022 21:50:13 +0800 Subject: [PATCH 49/59] fix tests --- query/tests/it/sql/optimizer/heuristic/exchange.rs | 3 ++- query/tests/it/sql/optimizer/heuristic/mod.rs | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/query/tests/it/sql/optimizer/heuristic/exchange.rs b/query/tests/it/sql/optimizer/heuristic/exchange.rs index 0b075b9dec19d..3d4465e73fc7c 100644 --- a/query/tests/it/sql/optimizer/heuristic/exchange.rs +++ b/query/tests/it/sql/optimizer/heuristic/exchange.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use common_ast::parser::parse_sql; use common_ast::parser::tokenize_sql; use common_ast::Backtrace; +use common_ast::Dialect; use common_base::base::tokio; use common_catalog::table_context::TableContext; use common_exception::ErrorCode; @@ -39,7 +40,7 @@ use crate::tests::create_query_context; async fn run_cluster_test(ctx: Arc, suite: &Suite) -> Result { let tokens = tokenize_sql(&suite.query)?; let bt = Backtrace::new(); - let (stmt, _) = parse_sql(&tokens, &bt)?; + let (stmt, _) = parse_sql(&tokens, Dialect::PostgreSQL, &bt)?; let binder = Binder::new( ctx.clone(), ctx.get_catalogs(), diff --git a/query/tests/it/sql/optimizer/heuristic/mod.rs b/query/tests/it/sql/optimizer/heuristic/mod.rs index 350e8d0e4152c..94feaeca75041 100644 --- a/query/tests/it/sql/optimizer/heuristic/mod.rs +++ b/query/tests/it/sql/optimizer/heuristic/mod.rs @@ -25,6 +25,7 @@ use std::sync::Arc; use common_ast::parser::parse_sql; use common_ast::parser::tokenize_sql; use common_ast::Backtrace; +use common_ast::Dialect; use common_exception::ErrorCode; use common_exception::Result; use databend_query::sessions::QueryContext; @@ -47,7 +48,7 @@ pub(super) struct Suite { async fn run_test(ctx: Arc, suite: &Suite) -> Result { let tokens = tokenize_sql(&suite.query)?; let bt = Backtrace::new(); - let (stmt, _) = parse_sql(&tokens, &bt)?; + let (stmt, _) = parse_sql(&tokens, Dialect::PostgreSQL, &bt)?; let binder = Binder::new( ctx.clone(), ctx.get_catalogs(), From 6cedcba9555d365c1e68c047f67a6a840059e42e Mon Sep 17 00:00:00 2001 From: leiysky Date: Wed, 10 Aug 2022 21:50:01 +0800 Subject: [PATCH 50/59] fix tests --- .../suites/base/04_explain/04_0002_explain_v2 | 19 +++++-------------- .../04_explain/04_0002_explain_v2.result | 10 +++++----- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/tests/logictest/suites/base/04_explain/04_0002_explain_v2 b/tests/logictest/suites/base/04_explain/04_0002_explain_v2 index 5b898a352408b..d135f7fec7347 100644 --- a/tests/logictest/suites/base/04_explain/04_0002_explain_v2 +++ b/tests/logictest/suites/base/04_explain/04_0002_explain_v2 @@ -1,9 +1,5 @@ -- TODO(need fix) -onlyif mysql -statement ok -set enable_planner_v2 = 1; - onlyif mysql statement ok drop table if exists t1 all; @@ -27,7 +23,7 @@ explain select t1.a from t1 where a > 0; ---- Project: [a (#0)] └── Filter: [t1.a (#0) > 0] - └── Scan: default.default.t1 + └── Scan: default.default.t1, filters: [t1.a (#0) > 0] onlyif mysql statement query T @@ -36,8 +32,8 @@ explain select * from t1, t2 where (t1.a = t2.a and t1.a > 3) or (t1.a = t2.a an ---- Filter: [(t1.a (#0) > 3) OR ((t2.a (#2) > 5) AND (t1.a (#0) > 1))] └── HashJoin: INNER, build keys: [t2.a (#2)], probe keys: [t1.a (#0)], join filters: [] - ├── Scan: default.default.t1 - └── Scan: default.default.t2 + ├── Scan: default.default.t1, filters: [] + └── Scan: default.default.t2, filters: [] onlyif mysql statement query T @@ -45,8 +41,8 @@ explain select * from t1, t2 where (t1.a = t2.a and t1.a > 3) or (t1.a = t2.a); ---- HashJoin: INNER, build keys: [t2.a (#2)], probe keys: [t1.a (#0)], join filters: [] -├── Scan: default.default.t1 -└── Scan: default.default.t2 +├── Scan: default.default.t1, filters: [] +└── Scan: default.default.t2, filters: [] onlyif mysql statement ok @@ -55,8 +51,3 @@ drop table t1; onlyif mysql statement ok drop table t2; - -onlyif mysql -statement ok -set enable_planner_v2 = 0; - diff --git a/tests/suites/0_stateless/04_explain/04_0002_explain_v2.result b/tests/suites/0_stateless/04_explain/04_0002_explain_v2.result index 7f63c73bbdfa4..27b8e24c383e4 100644 --- a/tests/suites/0_stateless/04_explain/04_0002_explain_v2.result +++ b/tests/suites/0_stateless/04_explain/04_0002_explain_v2.result @@ -1,11 +1,11 @@ ===Explain=== Project: [a (#0)] └── Filter: [t1.a (#0) > 0] - └── Scan: default.default.t1 + └── Scan: default.default.t1, filters: [t1.a (#0) > 0] Filter: [(t1.a (#0) > 3) OR ((t2.a (#2) > 5) AND (t1.a (#0) > 1))] └── HashJoin: INNER, build keys: [t2.a (#2)], probe keys: [t1.a (#0)], join filters: [] - ├── Scan: default.default.t1 - └── Scan: default.default.t2 + ├── Scan: default.default.t1, filters: [] + └── Scan: default.default.t2, filters: [] HashJoin: INNER, build keys: [t2.a (#2)], probe keys: [t1.a (#0)], join filters: [] -├── Scan: default.default.t1 -└── Scan: default.default.t2 +├── Scan: default.default.t1, filters: [] +└── Scan: default.default.t2, filters: [] From c54c27a3151b61c75f492c3fccd65bf9f41cbf30 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Wed, 10 Aug 2022 22:10:37 +0800 Subject: [PATCH 51/59] feat(tests): fix tests --- tests/logictest/logictest.py | 2 +- tests/logictest/suites/base/20+_others/20_0001_planner_v2 | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/logictest/logictest.py b/tests/logictest/logictest.py index f3eca6b009779..88e31b56d73da 100644 --- a/tests/logictest/logictest.py +++ b/tests/logictest/logictest.py @@ -100,7 +100,7 @@ def __init__(self, message, errorType, runner): self.runner = runner def __str__(self): - return f"Ruuner: {self.runner}\nErrorType: {self.errorType}\nMessage: {self.message}" + return f"Runner: {self.runner}\nErrorType: {self.errorType}\nMessage: {self.message}" class Statement: diff --git a/tests/logictest/suites/base/20+_others/20_0001_planner_v2 b/tests/logictest/suites/base/20+_others/20_0001_planner_v2 index 8d205f9a8a81f..c9769c62d369a 100644 --- a/tests/logictest/suites/base/20+_others/20_0001_planner_v2 +++ b/tests/logictest/suites/base/20+_others/20_0001_planner_v2 @@ -1,5 +1,6 @@ - +statement ok +set enable_planner_v2 = 1; statement query I @@ -1670,7 +1671,8 @@ statement ok insert into t3 values(1); - +statement ok +set enable_planner_v2 = 1; statement query I @@ -1861,4 +1863,3 @@ drop table t2; statement ok set enable_planner_v2 = 0; - From 947b7afb3329d208ec0460859558f43f36e5e5fd Mon Sep 17 00:00:00 2001 From: leiysky Date: Wed, 10 Aug 2022 22:20:19 +0800 Subject: [PATCH 52/59] fix tests --- common/storages/hive/src/hive_table.rs | 6 ------ query/tests/it/sql/planner/format/mod.rs | 8 ++++---- .../04_explain/04_0002_explain_v2_cluster.result | 10 +++++----- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/common/storages/hive/src/hive_table.rs b/common/storages/hive/src/hive_table.rs index 29c7a6852df0c..bb144ddd91b35 100644 --- a/common/storages/hive/src/hive_table.rs +++ b/common/storages/hive/src/hive_table.rs @@ -213,12 +213,6 @@ impl HiveTable { if let Some(partition_keys) = &self.table_options.partition_keys { if !partition_keys.is_empty() { let filter_expression = if let Some(extras) = push_downs { - if extras.filters.len() > 1 { - return Err(ErrorCode::UnImplement(format!( - "more than one filters, {:?}", - extras.filters - ))); - }; extras.filters.get(0).cloned() } else { None diff --git a/query/tests/it/sql/planner/format/mod.rs b/query/tests/it/sql/planner/format/mod.rs index 3516eefdf05e9..09825a69e8892 100644 --- a/query/tests/it/sql/planner/format/mod.rs +++ b/query/tests/it/sql/planner/format/mod.rs @@ -158,15 +158,15 @@ fn test_format() { let result = tree.format_indent().unwrap(); let expect = r#"HashJoin: INNER, build keys: [plus(col1 (#0), 123)], probe keys: [col2 (#1)], join filters: [] Filter: [true] - Scan: catalog.database.table - Scan: catalog.database.table + Scan: catalog.database.table, filters: [] + Scan: catalog.database.table, filters: [] "#; assert_eq!(result.as_str(), expect); let pretty_result = tree.format_pretty().unwrap(); let pretty_expect = r#"HashJoin: INNER, build keys: [plus(col1 (#0), 123)], probe keys: [col2 (#1)], join filters: [] ├── Filter: [true] -│ └── Scan: catalog.database.table -└── Scan: catalog.database.table +│ └── Scan: catalog.database.table, filters: [] +└── Scan: catalog.database.table, filters: [] "#; assert_eq!(pretty_result.as_str(), pretty_expect); } diff --git a/tests/suites/0_stateless/04_explain/04_0002_explain_v2_cluster.result b/tests/suites/0_stateless/04_explain/04_0002_explain_v2_cluster.result index 493bad17f66a1..dbd001adcca9b 100644 --- a/tests/suites/0_stateless/04_explain/04_0002_explain_v2_cluster.result +++ b/tests/suites/0_stateless/04_explain/04_0002_explain_v2_cluster.result @@ -2,17 +2,17 @@ Exchange(Merge) └── Project: [a (#0)] └── Filter: [t1.a (#0) > 0] - └── Scan: default.default.t1 + └── Scan: default.default.t1, filters: [t1.a (#0) > 0] Exchange(Merge) └── Filter: [(t1.a (#0) > 3) OR ((t2.a (#2) > 5) AND (t1.a (#0) > 1))] └── HashJoin: INNER, build keys: [t2.a (#2)], probe keys: [t1.a (#0)], join filters: [] ├── Exchange(Hash): keys: [t1.a (#0)] - │ └── Scan: default.default.t1 + │ └── Scan: default.default.t1, filters: [] └── Exchange(Hash): keys: [t2.a (#2)] - └── Scan: default.default.t2 + └── Scan: default.default.t2, filters: [] Exchange(Merge) └── HashJoin: INNER, build keys: [t2.a (#2)], probe keys: [t1.a (#0)], join filters: [] ├── Exchange(Hash): keys: [t1.a (#0)] - │ └── Scan: default.default.t1 + │ └── Scan: default.default.t1, filters: [] └── Exchange(Hash): keys: [t2.a (#2)] - └── Scan: default.default.t2 + └── Scan: default.default.t2, filters: [] From cce0bc2bc9500060a9a564c2d1f5a7bdd80e9e78 Mon Sep 17 00:00:00 2001 From: lichuang Date: Wed, 10 Aug 2022 23:19:37 +0800 Subject: [PATCH 53/59] refactor by comment --- common/ast/src/ast/statements/statement.rs | 4 ++-- common/ast/src/parser/statement.rs | 6 +++--- common/ast/tests/it/testdata/statement.txt | 6 +++--- query/src/interpreters/interpreter_factory_v2.rs | 2 +- query/src/interpreters/interpreter_share_alter_tenants.rs | 6 +++--- query/src/sql/planner/binder/ddl/share.rs | 6 +++--- query/src/sql/planner/binder/mod.rs | 2 +- query/src/sql/planner/format/display_plan.rs | 2 +- query/src/sql/planner/plans/mod.rs | 6 +++--- query/src/sql/planner/plans/share.rs | 6 +++--- 10 files changed, 23 insertions(+), 23 deletions(-) diff --git a/common/ast/src/ast/statements/statement.rs b/common/ast/src/ast/statements/statement.rs index dce573b9ad341..a37bfa9a74bd1 100644 --- a/common/ast/src/ast/statements/statement.rs +++ b/common/ast/src/ast/statements/statement.rs @@ -163,7 +163,7 @@ pub enum Statement<'a> { DropShare(DropShareStmt<'a>), GrantShareObject(GrantShareObjectStmt<'a>), RevokeShareObject(RevokeShareObjectStmt<'a>), - AlterShareAccounts(AlterShareTenantsStmt<'a>), + AlterShareTenants(AlterShareTenantsStmt<'a>), } #[derive(Debug, Clone, PartialEq)] @@ -369,7 +369,7 @@ impl<'a> Display for Statement<'a> { Statement::DropShare(stmt) => write!(f, "{stmt}")?, Statement::GrantShareObject(stmt) => write!(f, "{stmt}")?, Statement::RevokeShareObject(stmt) => write!(f, "{stmt}")?, - Statement::AlterShareAccounts(stmt) => write!(f, "{stmt}")?, + Statement::AlterShareTenants(stmt) => write!(f, "{stmt}")?, } Ok(()) } diff --git a/common/ast/src/parser/statement.rs b/common/ast/src/parser/statement.rs index c0649d2c2567d..083c44b899979 100644 --- a/common/ast/src/parser/statement.rs +++ b/common/ast/src/parser/statement.rs @@ -794,12 +794,12 @@ pub fn statement(i: Input) -> IResult { }) }, ); - let alter_share_accounts = map( + let alter_share_tenants = map( rule! { ALTER ~ SHARE ~ (IF ~ EXISTS )? ~ #ident ~ #alter_add_share_accounts ~ TENANTS ~ Eq ~ #comma_separated_list1(ident) }, |(_, _, opt_if_exists, share, is_add, _, _, tenants)| { - Statement::AlterShareAccounts(AlterShareTenantsStmt { + Statement::AlterShareTenants(AlterShareTenantsStmt { share, if_exists: opt_if_exists.is_some(), is_add, @@ -899,7 +899,7 @@ pub fn statement(i: Input) -> IResult { | #drop_share: "`DROP SHARE [IF EXISTS] `" | #grant_share_object: "`GRANT { USAGE | SELECT | REFERENCE_USAGE } ON { DATABASE db | TABLE db.table } TO SHARE `" | #revoke_share_object: "`REVOKE { USAGE | SELECT | REFERENCE_USAGE } ON { DATABASE db | TABLE db.table } FROM SHARE `" - | #alter_share_accounts: "`ALTER SHARE [IF EXISTS] { ADD | REMOVE } TENANTS = tenant [, tenant, ...]`" + | #alter_share_tenants: "`ALTER SHARE [IF EXISTS] { ADD | REMOVE } TENANTS = tenant [, tenant, ...]`" ), )); diff --git a/common/ast/tests/it/testdata/statement.txt b/common/ast/tests/it/testdata/statement.txt index 5a526d9bded48..ddcd2626dbb81 100644 --- a/common/ast/tests/it/testdata/statement.txt +++ b/common/ast/tests/it/testdata/statement.txt @@ -6095,7 +6095,7 @@ ALTER SHARE a ADD TENANTS = b,c; ---------- Output --------- ALTER SHARE a ADD TENANTS = b,c ---------- AST ------------ -AlterShareAccounts( +AlterShareTenants( AlterShareTenantsStmt { share: Identifier { name: "a", @@ -6125,7 +6125,7 @@ ALTER SHARE IF EXISTS a ADD TENANTS = b,c; ---------- Output --------- ALTER SHARE IF EXISTS a ADD TENANTS = b,c ---------- AST ------------ -AlterShareAccounts( +AlterShareTenants( AlterShareTenantsStmt { share: Identifier { name: "a", @@ -6155,7 +6155,7 @@ ALTER SHARE IF EXISTS a REMOVE TENANTS = b,c; ---------- Output --------- ALTER SHARE IF EXISTS a REMOVE TENANTS = b,c ---------- AST ------------ -AlterShareAccounts( +AlterShareTenants( AlterShareTenantsStmt { share: Identifier { name: "a", diff --git a/query/src/interpreters/interpreter_factory_v2.rs b/query/src/interpreters/interpreter_factory_v2.rs index 3ac5668b4e16a..e8063bd14627c 100644 --- a/query/src/interpreters/interpreter_factory_v2.rs +++ b/query/src/interpreters/interpreter_factory_v2.rs @@ -267,7 +267,7 @@ impl InterpreterFactoryV2 { ctx, *p.clone(), )?)), - Plan::AlterShareAccounts(p) => Ok(Arc::new(AlterShareTenantsInterpreter::try_create( + Plan::AlterShareTenants(p) => Ok(Arc::new(AlterShareTenantsInterpreter::try_create( ctx, *p.clone(), )?)), diff --git a/query/src/interpreters/interpreter_share_alter_tenants.rs b/query/src/interpreters/interpreter_share_alter_tenants.rs index 3c05e26c46f3e..1e023ee9a52d5 100644 --- a/query/src/interpreters/interpreter_share_alter_tenants.rs +++ b/query/src/interpreters/interpreter_share_alter_tenants.rs @@ -26,15 +26,15 @@ use common_streams::SendableDataBlockStream; use crate::interpreters::Interpreter; use crate::sessions::QueryContext; use crate::sessions::TableContext; -use crate::sql::plans::share::AlterShareAccountsPlan; +use crate::sql::plans::share::AlterShareTenantsPlan; pub struct AlterShareTenantsInterpreter { ctx: Arc, - plan: AlterShareAccountsPlan, + plan: AlterShareTenantsPlan, } impl AlterShareTenantsInterpreter { - pub fn try_create(ctx: Arc, plan: AlterShareAccountsPlan) -> Result { + pub fn try_create(ctx: Arc, plan: AlterShareTenantsPlan) -> Result { Ok(AlterShareTenantsInterpreter { ctx, plan }) } } diff --git a/query/src/sql/planner/binder/ddl/share.rs b/query/src/sql/planner/binder/ddl/share.rs index eb07d3ac7536d..809d2083995a5 100644 --- a/query/src/sql/planner/binder/ddl/share.rs +++ b/query/src/sql/planner/binder/ddl/share.rs @@ -19,7 +19,7 @@ use itertools::Itertools; use crate::sessions::TableContext; use crate::sql::binder::Binder; use crate::sql::normalize_identifier; -use crate::sql::plans::AlterShareAccountsPlan; +use crate::sql::plans::AlterShareTenantsPlan; use crate::sql::plans::CreateSharePlan; use crate::sql::plans::DropSharePlan; use crate::sql::plans::GrantShareObjectPlan; @@ -117,12 +117,12 @@ impl<'a> Binder { let share = normalize_identifier(share, &self.name_resolution_ctx).name; - let plan = AlterShareAccountsPlan { + let plan = AlterShareTenantsPlan { share, if_exists: *if_exists, is_add: *is_add, accounts: tenants.iter().map(|v| v.to_string()).collect_vec(), }; - Ok(Plan::AlterShareAccounts(Box::new(plan))) + Ok(Plan::AlterShareTenants(Box::new(plan))) } } diff --git a/query/src/sql/planner/binder/mod.rs b/query/src/sql/planner/binder/mod.rs index b7742b7b38b1c..cdf476a32b709 100644 --- a/query/src/sql/planner/binder/mod.rs +++ b/query/src/sql/planner/binder/mod.rs @@ -311,7 +311,7 @@ impl<'a> Binder { Statement::RevokeShareObject(stmt) => { self.bind_revoke_share_object(stmt).await? } - Statement::AlterShareAccounts(stmt) => { + Statement::AlterShareTenants(stmt) => { self.bind_alter_share_accounts(stmt).await? } }; diff --git a/query/src/sql/planner/format/display_plan.rs b/query/src/sql/planner/format/display_plan.rs index d0f2c0e5bdaff..eeee74927a52c 100644 --- a/query/src/sql/planner/format/display_plan.rs +++ b/query/src/sql/planner/format/display_plan.rs @@ -98,7 +98,7 @@ impl Plan { Plan::DropShare(p) => Ok(format!("{:?}", p)), Plan::GrantShareObject(p) => Ok(format!("{:?}", p)), Plan::RevokeShareObject(p) => Ok(format!("{:?}", p)), - Plan::AlterShareAccounts(p) => Ok(format!("{:?}", p)), + Plan::AlterShareTenants(p) => Ok(format!("{:?}", p)), } } } diff --git a/query/src/sql/planner/plans/mod.rs b/query/src/sql/planner/plans/mod.rs index ae0078d3b4d9a..040581de6b0b4 100644 --- a/query/src/sql/planner/plans/mod.rs +++ b/query/src/sql/planner/plans/mod.rs @@ -203,7 +203,7 @@ pub enum Plan { DropShare(Box), GrantShareObject(Box), RevokeShareObject(Box), - AlterShareAccounts(Box), + AlterShareTenants(Box), } #[derive(Clone)] @@ -278,7 +278,7 @@ impl Display for Plan { Plan::DropShare(_) => write!(f, "DropShare"), Plan::GrantShareObject(_) => write!(f, "GrantShareObject"), Plan::RevokeShareObject(_) => write!(f, "RevokeShareObject"), - Plan::AlterShareAccounts(_) => write!(f, "AlterShareAccounts"), + Plan::AlterShareTenants(_) => write!(f, "AlterShareTenants"), } } } @@ -345,7 +345,7 @@ impl Plan { Plan::DropShare(plan) => plan.schema(), Plan::GrantShareObject(plan) => plan.schema(), Plan::RevokeShareObject(plan) => plan.schema(), - Plan::AlterShareAccounts(plan) => plan.schema(), + Plan::AlterShareTenants(plan) => plan.schema(), } } } diff --git a/query/src/sql/planner/plans/share.rs b/query/src/sql/planner/plans/share.rs index 5a836baed7715..b29dcbdccfbba 100644 --- a/query/src/sql/planner/plans/share.rs +++ b/query/src/sql/planner/plans/share.rs @@ -106,16 +106,16 @@ impl RevokeShareObjectPlan { } } -// Alter Share Accounts Plan +// Alter Share Tenants Plan #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] -pub struct AlterShareAccountsPlan { +pub struct AlterShareTenantsPlan { pub share: String, pub if_exists: bool, pub accounts: Vec, pub is_add: bool, } -impl AlterShareAccountsPlan { +impl AlterShareTenantsPlan { pub fn schema(&self) -> DataSchemaRef { Arc::new(DataSchema::empty()) } From d8c99fb9d1cb679fe1c0e02e7296fe457728de24 Mon Sep 17 00:00:00 2001 From: andylokandy Date: Wed, 10 Aug 2022 23:32:20 +0800 Subject: [PATCH 54/59] fix tests --- common/ast/src/input.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/common/ast/src/input.rs b/common/ast/src/input.rs index a296982286405..bd2ed7912064c 100644 --- a/common/ast/src/input.rs +++ b/common/ast/src/input.rs @@ -89,7 +89,8 @@ impl Dialect { pub fn is_ident_quote(&self, c: char) -> bool { match self { Dialect::MySQL => c == '`', - Dialect::PostgreSQL => c == '"', + // TODO: remove '`' quote support once mysql handler correctly set mysql dialect. + Dialect::PostgreSQL => c == '"' || c == '`', } } From c9c0b37b6c631508a7cc78e607da9ac22c6e835c Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Wed, 10 Aug 2022 18:06:16 -0700 Subject: [PATCH 55/59] Update common/expression/src/values.rs Co-authored-by: xudong.w --- common/expression/src/values.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/expression/src/values.rs b/common/expression/src/values.rs index 9b8f42f035831..3ba896341c76b 100755 --- a/common/expression/src/values.rs +++ b/common/expression/src/values.rs @@ -708,8 +708,9 @@ impl Column { Column::Nullable(col) => { let arrow_array = col.column.as_arrow(); match arrow_array.data_type() { - ArrowType::Null => arrow_array, - ArrowType::Extension(_, t, _) if **t == ArrowType::Null => arrow_array, + ArrowType::Null | ArrowType::Extension(_, t, _) if **t == ArrowType::Null => { + arrow_array + } _ => arrow_array.with_validity(Some(col.validity.clone())), } } From 6d3432a5f98e9d5420428babfd3cb2be579aad3b Mon Sep 17 00:00:00 2001 From: elijah Date: Thu, 11 Aug 2022 09:06:21 +0800 Subject: [PATCH 56/59] chore: rename datetime functions for scalars --- .../scalars/arithmetics/arithmetic_minus.rs | 2 +- .../scalars/arithmetics/arithmetic_plus.rs | 2 +- .../tests/it/plan_expression_monotonicity.rs | 28 +++++++++---------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/common/functions/src/scalars/arithmetics/arithmetic_minus.rs b/common/functions/src/scalars/arithmetics/arithmetic_minus.rs index 60ece4bda6f34..ac23ad98c0e7d 100644 --- a/common/functions/src/scalars/arithmetics/arithmetic_minus.rs +++ b/common/functions/src/scalars/arithmetics/arithmetic_minus.rs @@ -73,7 +73,7 @@ impl ArithmeticMinusFunction { if right_type.is_interval() { let interval: IntervalType = args[1].to_owned().try_into()?; let kind = interval.kind(); - let function_name = format!("subtract{}s", kind); + let function_name = format!("subtract_{}s", kind); return FunctionFactory::instance().get(function_name, &[args[0], &Int64Type::new_impl()]); } with_match_date_type_error!(right_type, |$D| { diff --git a/common/functions/src/scalars/arithmetics/arithmetic_plus.rs b/common/functions/src/scalars/arithmetics/arithmetic_plus.rs index c0adf7289cb07..174f1dc7a5fcf 100644 --- a/common/functions/src/scalars/arithmetics/arithmetic_plus.rs +++ b/common/functions/src/scalars/arithmetics/arithmetic_plus.rs @@ -80,7 +80,7 @@ impl ArithmeticPlusFunction { if right_type.is_interval() { let interval: IntervalType = args[1].to_owned().try_into()?; let kind = interval.kind(); - let function_name = format!("add{}s", kind); + let function_name = format!("add_{}s", kind); FunctionFactory::instance().get(function_name, &[args[0], &Int64Type::new_impl()]) } else { error_fn() diff --git a/common/planners/tests/it/plan_expression_monotonicity.rs b/common/planners/tests/it/plan_expression_monotonicity.rs index f7e7dc71b1400..3dde8c340ffb6 100644 --- a/common/planners/tests/it/plan_expression_monotonicity.rs +++ b/common/planners/tests/it/plan_expression_monotonicity.rs @@ -456,8 +456,8 @@ fn test_abs_function() -> Result<()> { fn test_dates_function() -> Result<()> { let test_suite = vec![ Test { - name: "f(x) = toStartOfWeek(z+12)", - expr: Expression::create_scalar_function("toStartOfWeek", vec![add( + name: "f(x) = to_start_of_week(z+12)", + expr: Expression::create_scalar_function("to_start_of_week", vec![add( col("z"), lit(12i32), )]), @@ -473,8 +473,8 @@ fn test_dates_function() -> Result<()> { }, }, Test { - name: "f(x) = toMonday(x)", - expr: Expression::create_scalar_function("toMonday", vec![col("x")]), + name: "f(x) = to_monday(x)", + expr: Expression::create_scalar_function("to_monday", vec![col("x")]), column: "x", left: None, right: None, @@ -487,17 +487,17 @@ fn test_dates_function() -> Result<()> { }, }, Test { - // Function 'toSecond' is not monotonic in the variables range. - name: "f(x) = toSecond(x)", - expr: Expression::create_scalar_function("toSecond", vec![col("x")]), + // Function 'to_second' is not monotonic in the variables range. + name: "f(x) = to_second(x)", + expr: Expression::create_scalar_function("to_second", vec![col("x")]), column: "x", left: None, right: None, expect_mono: Monotonicity::default(), }, Test { - name: "f(z) = toSecond(z)", - expr: Expression::create_scalar_function("toSecond", vec![col("z")]), + name: "f(z) = to_second(z)", + expr: Expression::create_scalar_function("to_second", vec![col("z")]), column: "z", left: create_datetime(1638288000000000), right: create_datetime(1638288059000000), @@ -510,17 +510,17 @@ fn test_dates_function() -> Result<()> { }, }, Test { - // Function 'toDayOfYear' is not monotonic in the variables range. - name: "f(z) = toDayOfYear(z)", - expr: Expression::create_scalar_function("toDayOfYear", vec![col("z")]), + // Function 'to_day_of_year' is not monotonic in the variables range. + name: "f(z) = to_day_of_year(z)", + expr: Expression::create_scalar_function("to_day_of_year", vec![col("z")]), column: "z", left: create_datetime(1606752119000000), right: create_datetime(1638288059000000), expect_mono: Monotonicity::default(), }, Test { - name: "f(z) = toStartOfHour(z)", - expr: Expression::create_scalar_function("toStartOfHour", vec![col("z")]), + name: "f(z) = to_start_of_hour(z)", + expr: Expression::create_scalar_function("to_start_of_hour", vec![col("z")]), column: "z", left: None, right: None, From 3aaca0c5224da32068c0345ac2747e4d32d9ee65 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Thu, 11 Aug 2022 09:14:42 +0800 Subject: [PATCH 57/59] feat(tests): fix tests --- tests/suites/1_stateful/04_mini_dataset/04_0000_mini_ontime.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/suites/1_stateful/04_mini_dataset/04_0000_mini_ontime.sh b/tests/suites/1_stateful/04_mini_dataset/04_0000_mini_ontime.sh index 72ec999ba1e39..79a8f8ff770b4 100755 --- a/tests/suites/1_stateful/04_mini_dataset/04_0000_mini_ontime.sh +++ b/tests/suites/1_stateful/04_mini_dataset/04_0000_mini_ontime.sh @@ -23,6 +23,9 @@ ontime_statements=( "SELECT OriginCityName, DestCityName, count(*) AS c FROM ontime_mini GROUP BY OriginCityName, DestCityName ORDER BY c DESC LIMIT 10;" ) +for i in "${ontime_statements[@]}"; do + echo "$i" | $MYSQL_CLIENT_CONNECT +done ## Clean table echo "drop table if exists ontime_mini all;" | $MYSQL_CLIENT_CONNECT From 42a9a5e5f8924be4b9f820121d00f50441f65262 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Thu, 11 Aug 2022 09:21:13 +0800 Subject: [PATCH 58/59] feat(tests): fix lint --- common/expression/src/values.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/common/expression/src/values.rs b/common/expression/src/values.rs index 3ba896341c76b..9b8f42f035831 100755 --- a/common/expression/src/values.rs +++ b/common/expression/src/values.rs @@ -708,9 +708,8 @@ impl Column { Column::Nullable(col) => { let arrow_array = col.column.as_arrow(); match arrow_array.data_type() { - ArrowType::Null | ArrowType::Extension(_, t, _) if **t == ArrowType::Null => { - arrow_array - } + ArrowType::Null => arrow_array, + ArrowType::Extension(_, t, _) if **t == ArrowType::Null => arrow_array, _ => arrow_array.with_validity(Some(col.validity.clone())), } } From c285cdcae214c6b21585d4e57a4ed613157cec86 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Thu, 11 Aug 2022 09:26:13 +0800 Subject: [PATCH 59/59] feat(tests): rename to from_data_with_validity --- common/expression/src/column_from.rs | 2 +- common/expression/tests/it/expression.rs | 64 ++++++++++++------------ common/expression/tests/it/kernel.rs | 22 ++++---- 3 files changed, 44 insertions(+), 44 deletions(-) diff --git a/common/expression/src/column_from.rs b/common/expression/src/column_from.rs index 1b55a8f41cf32..e899bd6a6af63 100755 --- a/common/expression/src/column_from.rs +++ b/common/expression/src/column_from.rs @@ -24,7 +24,7 @@ pub trait ColumnFrom { /// Initialize by name and values. fn from_data(_: D) -> Column; - fn from_data_valids(d: D, valids: Vec) -> Column { + fn from_data_with_validity(d: D, valids: Vec) -> Column { let column = Self::from_data(d); Column::Nullable(Box::new(NullableColumn { column, diff --git a/common/expression/tests/it/expression.rs b/common/expression/tests/it/expression.rs index a1e07440e8343..28278d0f3083e 100644 --- a/common/expression/tests/it/expression.rs +++ b/common/expression/tests/it/expression.rs @@ -62,25 +62,25 @@ pub fn test_pass() { run_ast(&mut file, "plus(a, 10)", &[( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u8, 11, 12], vec![false, true, false]), )]); run_ast(&mut file, "plus(a, b)", &[ ( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u8, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::UInt8)), - Column::from_data_valids(vec![1u8, 2, 3], vec![false, true, true]), + Column::from_data_with_validity(vec![1u8, 2, 3], vec![false, true, true]), ), ]); run_ast(&mut file, "plus(a, b)", &[ ( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u8, 11, 12], vec![false, true, false]), ), ("b", DataType::Null, Column::Null { len: 3 }), ]); @@ -88,19 +88,19 @@ pub fn test_pass() { run_ast(&mut file, "minus(a, 10)", &[( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u8, 11, 12], vec![false, true, false]), )]); run_ast(&mut file, "minus(a, b)", &[ ( "a", DataType::Nullable(Box::new(DataType::UInt16)), - Column::from_data_valids(vec![10u16, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u16, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int16)), - Column::from_data_valids(vec![1i16, 2, 3], vec![false, true, true]), + Column::from_data_with_validity(vec![1i16, 2, 3], vec![false, true, true]), ), ]); @@ -108,7 +108,7 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::from_data_valids(vec![10u16, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u16, 11, 12], vec![false, true, false]), ), ("b", DataType::Null, Column::Null { len: 3 }), ]); @@ -116,19 +116,19 @@ pub fn test_pass() { run_ast(&mut file, "multiply(a, 10)", &[( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u8, 11, 12], vec![false, true, false]), )]); run_ast(&mut file, "multiply(a, b)", &[ ( "a", DataType::Nullable(Box::new(DataType::UInt16)), - Column::from_data_valids(vec![10u16, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u16, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int16)), - Column::from_data_valids(vec![1i16, 2, 3], vec![false, true, true]), + Column::from_data_with_validity(vec![1i16, 2, 3], vec![false, true, true]), ), ]); @@ -136,12 +136,12 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::UInt32)), - Column::from_data_valids(vec![10u32, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u32, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int32)), - Column::from_data_valids(vec![1i32, 2, 3], vec![false, true, true]), + Column::from_data_with_validity(vec![1i32, 2, 3], vec![false, true, true]), ), ]); @@ -149,7 +149,7 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u8, 11, 12], vec![false, true, false]), ), ("b", DataType::Null, Column::Null { len: 3 }), ]); @@ -157,19 +157,19 @@ pub fn test_pass() { run_ast(&mut file, "divide(a, 10)", &[( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u8, 11, 12], vec![false, true, false]), )]); run_ast(&mut file, "divide(a, b)", &[ ( "a", DataType::Nullable(Box::new(DataType::UInt16)), - Column::from_data_valids(vec![10u16, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u16, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int16)), - Column::from_data_valids(vec![1i16, 2, 3], vec![false, true, true]), + Column::from_data_with_validity(vec![1i16, 2, 3], vec![false, true, true]), ), ]); @@ -177,7 +177,7 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u8, 11, 12], vec![false, true, false]), ), ("b", DataType::Null, Column::Null { len: 3 }), ]); @@ -185,19 +185,19 @@ pub fn test_pass() { run_ast(&mut file, "avg(a, 10)", &[( "a", DataType::Nullable(Box::new(DataType::UInt8)), - Column::from_data_valids(vec![10u8, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u8, 11, 12], vec![false, true, false]), )]); run_ast(&mut file, "avg(a, b)", &[ ( "a", DataType::Nullable(Box::new(DataType::UInt16)), - Column::from_data_valids(vec![10u16, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u16, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int16)), - Column::from_data_valids(vec![1i16, 2, 3], vec![false, true, true]), + Column::from_data_with_validity(vec![1i16, 2, 3], vec![false, true, true]), ), ]); @@ -205,12 +205,12 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::UInt32)), - Column::from_data_valids(vec![10u32, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10u32, 11, 12], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int32)), - Column::from_data_valids(vec![1i32, 2, 3], vec![false, true, true]), + Column::from_data_with_validity(vec![1i32, 2, 3], vec![false, true, true]), ), ]); @@ -218,12 +218,12 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::Float32)), - Column::from_data_valids(vec![10f32, 11f32, 12f32], vec![false, true, false]), + Column::from_data_with_validity(vec![10f32, 11f32, 12f32], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Int32)), - Column::from_data_valids(vec![1i32, 2, 3], vec![false, true, true]), + Column::from_data_with_validity(vec![1i32, 2, 3], vec![false, true, true]), ), ]); @@ -231,12 +231,12 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::Float32)), - Column::from_data_valids(vec![10f32, 11f32, 12f32], vec![false, true, false]), + Column::from_data_with_validity(vec![10f32, 11f32, 12f32], vec![false, true, false]), ), ( "b", DataType::Nullable(Box::new(DataType::Float64)), - Column::from_data_valids(vec![1f64, 2f64, 3f64], vec![false, true, true]), + Column::from_data_with_validity(vec![1f64, 2f64, 3f64], vec![false, true, true]), ), ]); @@ -244,7 +244,7 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::Int8)), - Column::from_data_valids(vec![10, 11, 12], vec![false, true, false]), + Column::from_data_with_validity(vec![10, 11, 12], vec![false, true, false]), ), ("b", DataType::Null, Column::Null { len: 3 }), ]); @@ -252,7 +252,7 @@ pub fn test_pass() { run_ast(&mut file, "NOT a", &[( "a", DataType::Nullable(Box::new(DataType::Boolean)), - Column::from_data_valids(vec![true, false, true], vec![false, true, false]), + Column::from_data_with_validity(vec![true, false, true], vec![false, true, false]), )]); run_ast(&mut file, "NOT a", &[("a", DataType::Null, Column::Null { @@ -269,7 +269,7 @@ pub fn test_pass() { ( "b", DataType::Nullable(Box::new(DataType::String)), - Column::from_data_valids(vec!["a", "b", "c", "d", "e"], vec![ + Column::from_data_with_validity(vec!["a", "b", "c", "d", "e"], vec![ true, true, false, false, false, ]), ), @@ -278,12 +278,12 @@ pub fn test_pass() { ( "a", DataType::Nullable(Box::new(DataType::Boolean)), - Column::from_data_valids(vec![false; 5], vec![true, true, false, false, false]), + Column::from_data_with_validity(vec![false; 5], vec![true, true, false, false, false]), ), ( "b", DataType::Nullable(Box::new(DataType::String)), - Column::from_data_valids(vec!["a", "b", "c", "d", "e"], vec![ + Column::from_data_with_validity(vec!["a", "b", "c", "d", "e"], vec![ true, true, false, false, false, ]), ), diff --git a/common/expression/tests/it/kernel.rs b/common/expression/tests/it/kernel.rs index daf2aa99358b2..1b524eb7d53dd 100644 --- a/common/expression/tests/it/kernel.rs +++ b/common/expression/tests/it/kernel.rs @@ -32,11 +32,11 @@ pub fn test_pass() { Column::Boolean(vec![true, false, false, false, true].into()), &[ Column::Int32(vec![0, 1, 2, 3, -4].into()), - Column::from_data_valids(vec![10u8, 11, 12, 13, 14], vec![ + Column::from_data_with_validity(vec![10u8, 11, 12, 13, 14], vec![ false, true, false, false, false, ]), Column::Null { len: 5 }, - Column::from_data_valids(vec!["a", "b", "c", "d", "e"], vec![ + Column::from_data_with_validity(vec!["a", "b", "c", "d", "e"], vec![ true, true, false, false, false, ]), ], @@ -44,16 +44,16 @@ pub fn test_pass() { run_filter( &mut file, - Column::from_data_valids(vec![true, true, false, true, true], vec![ + Column::from_data_with_validity(vec![true, true, false, true, true], vec![ false, true, true, false, false, ]), &[ Column::Int32(vec![0, 1, 2, 3, -4].into()), - Column::from_data_valids(vec![10u8, 11, 12, 13, 14], vec![ + Column::from_data_with_validity(vec![10u8, 11, 12, 13, 14], vec![ false, true, false, false, false, ]), Column::Null { len: 5 }, - Column::from_data_valids(vec!["x", "y", "z", "a", "b"], vec![ + Column::from_data_with_validity(vec!["x", "y", "z", "a", "b"], vec![ false, true, true, false, false, ]), ], @@ -62,21 +62,21 @@ pub fn test_pass() { run_concat(&mut file, vec![ vec![ Column::Int32(vec![0, 1, 2, 3, -4].into()), - Column::from_data_valids(vec![10u8, 11, 12, 13, 14], vec![ + Column::from_data_with_validity(vec![10u8, 11, 12, 13, 14], vec![ false, true, false, false, false, ]), Column::Null { len: 5 }, Column::EmptyArray { len: 5 }, - Column::from_data_valids(vec!["x", "y", "z", "a", "b"], vec![ + Column::from_data_with_validity(vec!["x", "y", "z", "a", "b"], vec![ false, true, true, false, false, ]), ], vec![ Column::Int32(vec![5, 6].into()), - Column::from_data_valids(vec![15u8, 16], vec![false, true]), + Column::from_data_with_validity(vec![15u8, 16], vec![false, true]), Column::Null { len: 2 }, Column::EmptyArray { len: 2 }, - Column::from_data_valids(vec!["x", "y"], vec![false, true]), + Column::from_data_with_validity(vec!["x", "y"], vec![false, true]), ], ]); @@ -100,11 +100,11 @@ pub fn test_pass() { &mut file, &[ Column::Int32(vec![0, 1, 2, 3, -4].into()), - Column::from_data_valids(vec![10, 11, 12, 13, 14], vec![ + Column::from_data_with_validity(vec![10, 11, 12, 13, 14], vec![ false, true, false, false, false, ]), Column::Null { len: 5 }, - Column::from_data_valids(vec!["x", "y", "z", "a", "b"], vec![ + Column::from_data_with_validity(vec!["x", "y", "z", "a", "b"], vec![ false, true, true, false, false, ]), ],