diff --git a/Cargo.lock b/Cargo.lock index 02541f12842c4..f2af2b045029a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8314,6 +8314,7 @@ version = "0.1.0" dependencies = [ "anyerror", "cbordata", + "common-arrow", "common-exception", "common-expression", "common-functions", diff --git a/src/query/expression/src/function.rs b/src/query/expression/src/function.rs index d82185aba5c0f..38a18c499aac7 100755 --- a/src/query/expression/src/function.rs +++ b/src/query/expression/src/function.rs @@ -399,6 +399,13 @@ impl FunctionID { FunctionID::Factory { id, .. } => *id, } } + + pub fn name(&self) -> &String { + match self { + FunctionID::Builtin { name, .. } => name, + FunctionID::Factory { name, .. } => name, + } + } } pub fn wrap_nullable(f: F) -> impl Fn(&[ValueRef], &mut EvalContext) -> Value diff --git a/src/query/expression/src/kernels/concat.rs b/src/query/expression/src/kernels/concat.rs index b0dba4e548352..8bd6b36bad841 100644 --- a/src/query/expression/src/kernels/concat.rs +++ b/src/query/expression/src/kernels/concat.rs @@ -18,6 +18,7 @@ use itertools::Itertools; use crate::types::array::ArrayColumnBuilder; use crate::types::decimal::DecimalColumn; +use crate::types::map::KvColumnBuilder; use crate::types::nullable::NullableColumn; use crate::types::number::NumberColumn; use crate::types::string::StringColumnBuilder; @@ -28,6 +29,7 @@ use crate::types::BooleanType; use crate::types::DateType; use crate::types::EmptyArrayType; use crate::types::EmptyMapType; +use crate::types::MapType; use crate::types::NullType; use crate::types::NullableType; use crate::types::NumberType; @@ -138,7 +140,7 @@ impl Column { let builder = Vec::with_capacity(capacity); Self::concat_value_types::(builder, columns) } - Column::Array(col) | Column::Map(col) => { + Column::Array(col) => { let mut offsets = Vec::with_capacity(capacity + 1); offsets.push(0); let builder = ColumnBuilder::from_column( @@ -148,6 +150,24 @@ impl Column { let builder = ArrayColumnBuilder { builder, offsets }; Self::concat_value_types::>(builder, columns) } + Column::Map(col) => { + let mut offsets = Vec::with_capacity(capacity + 1); + offsets.push(0); + let builder = ColumnBuilder::from_column( + TypeDeserializerImpl::with_capacity(&col.values.data_type(), capacity) + .finish_to_column(), + ); + let (key_builder, val_builder) = match builder { + ColumnBuilder::Tuple { fields, .. } => (fields[0].clone(), fields[1].clone()), + _ => unreachable!(), + }; + let builder = KvColumnBuilder { + keys: key_builder, + values: val_builder, + }; + let builder = ArrayColumnBuilder { builder, offsets }; + Self::concat_value_types::>(builder, columns) + } Column::Nullable(_) => { let mut bitmaps = Vec::with_capacity(columns.len()); let mut inners = Vec::with_capacity(columns.len()); diff --git a/src/query/expression/src/kernels/filter.rs b/src/query/expression/src/kernels/filter.rs index 5238838687d1c..3e50aadb7c30b 100644 --- a/src/query/expression/src/kernels/filter.rs +++ b/src/query/expression/src/kernels/filter.rs @@ -19,8 +19,10 @@ use common_arrow::arrow::bitmap::MutableBitmap; use common_arrow::arrow::buffer::Buffer; use common_exception::Result; +use crate::types::array::ArrayColumn; use crate::types::array::ArrayColumnBuilder; use crate::types::decimal::DecimalColumn; +use crate::types::map::KvColumnBuilder; use crate::types::nullable::NullableColumn; use crate::types::number::NumberColumn; use crate::types::string::StringColumn; @@ -28,6 +30,7 @@ use crate::types::string::StringColumnBuilder; use crate::types::AnyType; use crate::types::ArrayType; use crate::types::BooleanType; +use crate::types::MapType; use crate::types::ValueType; use crate::types::VariantType; use crate::with_decimal_type; @@ -133,7 +136,7 @@ impl Column { let d = Self::filter_primitive_types(column, filter); Column::Date(d) } - Column::Array(column) | Column::Map(column) => { + Column::Array(column) => { let mut offsets = Vec::with_capacity(length + 1); offsets.push(0); let builder = ColumnBuilder::from_column( @@ -143,6 +146,25 @@ impl Column { let builder = ArrayColumnBuilder { builder, offsets }; Self::filter_scalar_types::>(column, builder, filter) } + Column::Map(column) => { + let mut offsets = Vec::with_capacity(length + 1); + offsets.push(0); + let builder = ColumnBuilder::from_column( + TypeDeserializerImpl::with_capacity(&column.values.data_type(), length) + .finish_to_column(), + ); + let (key_builder, val_builder) = match builder { + ColumnBuilder::Tuple { fields, .. } => (fields[0].clone(), fields[1].clone()), + _ => unreachable!(), + }; + let builder = KvColumnBuilder { + keys: key_builder, + values: val_builder, + }; + let builder = ArrayColumnBuilder { builder, offsets }; + let column = ArrayColumn::try_downcast(column).unwrap(); + Self::filter_scalar_types::>(&column, builder, filter) + } Column::Nullable(c) => { let column = Self::filter(&c.column, filter); let validity = Self::filter_scalar_types::( diff --git a/src/query/expression/src/kernels/scatter.rs b/src/query/expression/src/kernels/scatter.rs index 37f805a9b6223..0791fc1dcae14 100644 --- a/src/query/expression/src/kernels/scatter.rs +++ b/src/query/expression/src/kernels/scatter.rs @@ -16,8 +16,10 @@ use common_arrow::arrow::bitmap::MutableBitmap; use common_exception::Result; use itertools::Itertools; +use crate::types::array::ArrayColumn; use crate::types::array::ArrayColumnBuilder; use crate::types::decimal::DecimalColumn; +use crate::types::map::KvColumnBuilder; use crate::types::nullable::NullableColumn; use crate::types::number::NumberColumn; use crate::types::string::StringColumnBuilder; @@ -26,6 +28,7 @@ use crate::types::ArrayType; use crate::types::BooleanType; use crate::types::DataType; use crate::types::DateType; +use crate::types::MapType; use crate::types::NumberType; use crate::types::StringType; use crate::types::TimestampType; @@ -191,7 +194,7 @@ impl Column { indices, scatter_size, ), - Column::Array(column) | Column::Map(column) => { + Column::Array(column) => { let mut offsets = Vec::with_capacity(length + 1); offsets.push(0); let builder = ColumnBuilder::from_column( @@ -206,6 +209,30 @@ impl Column { scatter_size, ) } + Column::Map(column) => { + let mut offsets = Vec::with_capacity(length + 1); + offsets.push(0); + let builder = ColumnBuilder::from_column( + TypeDeserializerImpl::with_capacity(&column.values.data_type(), length) + .finish_to_column(), + ); + let (key_builder, val_builder) = match builder { + ColumnBuilder::Tuple { fields, .. } => (fields[0].clone(), fields[1].clone()), + _ => unreachable!(), + }; + let builder = KvColumnBuilder { + keys: key_builder, + values: val_builder, + }; + let builder = ArrayColumnBuilder { builder, offsets }; + let column = ArrayColumn::try_downcast(column).unwrap(); + Self::scatter_scalars::, _>( + &column, + builder, + indices, + scatter_size, + ) + } Column::Nullable(c) => { let columns = c.column.scatter(data_type, indices, scatter_size); let validitys = Self::scatter_scalars::( diff --git a/src/query/expression/src/kernels/take.rs b/src/query/expression/src/kernels/take.rs index 49c8e33d50794..7550d9d2c7600 100644 --- a/src/query/expression/src/kernels/take.rs +++ b/src/query/expression/src/kernels/take.rs @@ -15,14 +15,17 @@ use common_exception::Result; use itertools::Itertools; +use crate::types::array::ArrayColumn; use crate::types::array::ArrayColumnBuilder; use crate::types::decimal::DecimalColumn; +use crate::types::map::KvColumnBuilder; use crate::types::nullable::NullableColumn; use crate::types::number::NumberColumn; use crate::types::AnyType; use crate::types::ArgType; use crate::types::ArrayType; use crate::types::BooleanType; +use crate::types::MapType; use crate::types::NumberType; use crate::types::StringType; use crate::types::ValueType; @@ -102,7 +105,7 @@ impl Column { .unwrap(); Column::Date(d) } - Column::Array(column) | Column::Map(column) => { + Column::Array(column) => { let mut offsets = Vec::with_capacity(length + 1); offsets.push(0); let builder = ColumnBuilder::from_column( @@ -112,6 +115,25 @@ impl Column { let builder = ArrayColumnBuilder { builder, offsets }; Self::take_value_types::, _>(column, builder, indices) } + Column::Map(column) => { + let mut offsets = Vec::with_capacity(length + 1); + offsets.push(0); + let builder = ColumnBuilder::from_column( + TypeDeserializerImpl::with_capacity(&column.values.data_type(), self.len()) + .finish_to_column(), + ); + let (key_builder, val_builder) = match builder { + ColumnBuilder::Tuple { fields, .. } => (fields[0].clone(), fields[1].clone()), + _ => unreachable!(), + }; + let builder = KvColumnBuilder { + keys: key_builder, + values: val_builder, + }; + let builder = ArrayColumnBuilder { builder, offsets }; + let column = ArrayColumn::try_downcast(column).unwrap(); + Self::take_value_types::, _>(&column, builder, indices) + } Column::Nullable(c) => { let column = c.column.take(indices); let validity = Self::take_arg_types::(&c.validity, indices); diff --git a/src/query/expression/src/kernels/take_chunks.rs b/src/query/expression/src/kernels/take_chunks.rs index 77525b3ba120c..bebbdeb3e093b 100644 --- a/src/query/expression/src/kernels/take_chunks.rs +++ b/src/query/expression/src/kernels/take_chunks.rs @@ -17,6 +17,7 @@ use itertools::Itertools; use crate::types::array::ArrayColumnBuilder; use crate::types::decimal::DecimalColumn; +use crate::types::map::KvColumnBuilder; use crate::types::nullable::NullableColumn; use crate::types::number::NumberColumn; use crate::types::AnyType; @@ -25,6 +26,7 @@ use crate::types::ArrayType; use crate::types::BooleanType; use crate::types::DataType; use crate::types::DateType; +use crate::types::MapType; use crate::types::NumberType; use crate::types::StringType; use crate::types::TimestampType; @@ -237,7 +239,7 @@ impl Column { let builder = DateType::create_builder(result_size, &[]); Self::take_block_value_types::(columns, builder, indices) } - Column::Array(column) | Column::Map(column) => { + Column::Array(column) => { let mut offsets = Vec::with_capacity(result_size + 1); offsets.push(0); let builder = ColumnBuilder::from_column( @@ -247,6 +249,24 @@ impl Column { let builder = ArrayColumnBuilder { builder, offsets }; Self::take_block_value_types::>(columns, builder, indices) } + Column::Map(column) => { + let mut offsets = Vec::with_capacity(result_size + 1); + offsets.push(0); + let builder = ColumnBuilder::from_column( + TypeDeserializerImpl::with_capacity(&column.values.data_type(), result_size) + .finish_to_column(), + ); + let (key_builder, val_builder) = match builder { + ColumnBuilder::Tuple { fields, .. } => (fields[0].clone(), fields[1].clone()), + _ => unreachable!(), + }; + let builder = KvColumnBuilder { + keys: key_builder, + values: val_builder, + }; + let builder = ArrayColumnBuilder { builder, offsets }; + Self::take_block_value_types::>(columns, builder, indices) + } Column::Nullable(_) => { let inner_ty = datatype.as_nullable().unwrap(); let inner_columns = columns diff --git a/src/query/storages/common/index/Cargo.toml b/src/query/storages/common/index/Cargo.toml index 66ae73276978e..3c02ec511f877 100644 --- a/src/query/storages/common/index/Cargo.toml +++ b/src/query/storages/common/index/Cargo.toml @@ -32,6 +32,7 @@ xorfilter-rs = { git = "https://github.com/datafuse-extras/xorfilter", features ], tag = "databend-alpha.4" } [dev-dependencies] +common-arrow = { path = "../../../../common/arrow" } criterion = "0.4" rand = "0.8.5" diff --git a/src/query/storages/common/index/src/bloom_index.rs b/src/query/storages/common/index/src/bloom_index.rs index 99defa927c671..8cc05558c33da 100644 --- a/src/query/storages/common/index/src/bloom_index.rs +++ b/src/query/storages/common/index/src/bloom_index.rs @@ -21,7 +21,9 @@ use common_exception::Result; use common_exception::Span; use common_expression::converts::scalar_to_datavalue; use common_expression::eval_function; +use common_expression::types::AnyType; use common_expression::types::DataType; +use common_expression::types::MapType; use common_expression::types::NullableType; use common_expression::types::Number; use common_expression::types::NumberDataType; @@ -144,19 +146,48 @@ impl BloomIndex { let mut columns = Vec::new(); for i in 0..num_columns { let data_type = &data_blocks_tobe_indexed[0].get_by_offset(i).data_type; - if Xor8Filter::supported_type(data_type) { - fields.push(source_schema.field(i)); - - let source_columns = data_blocks_tobe_indexed - .iter() - .map(|block| { - let value = &block.get_by_offset(i).value; - value.convert_to_full_column(data_type, block.num_rows()) - }) - .collect::>(); - let column = Column::concat(&source_columns); - columns.push((column, data_type.clone())); - } + match data_type { + DataType::Map(box inner_ty) => { + // Add bloom filter for the value of map type + let val_type = match inner_ty { + DataType::Tuple(kv_tys) => kv_tys[1].clone(), + _ => unreachable!(), + }; + if Xor8Filter::supported_type(&val_type) { + fields.push(source_schema.field(i)); + + let source_columns = data_blocks_tobe_indexed + .iter() + .map(|block| { + let value = &block.get_by_offset(i).value; + let column = + value.convert_to_full_column(data_type, block.num_rows()); + let map_column = + MapType::::try_downcast_column(&column) + .unwrap(); + map_column.values.values + }) + .collect::>(); + let column = Column::concat(&source_columns); + columns.push((column, val_type)); + } + } + _ => { + if Xor8Filter::supported_type(data_type) { + fields.push(source_schema.field(i)); + + let source_columns = data_blocks_tobe_indexed + .iter() + .map(|block| { + let value = &block.get_by_offset(i).value; + value.convert_to_full_column(data_type, block.num_rows()) + }) + .collect::>(); + let column = Column::concat(&source_columns); + columns.push((column, data_type.clone())); + } + } + }; } if columns.is_empty() { return Ok(None); @@ -203,8 +234,13 @@ impl BloomIndex { let filter = filter_builder.build()?; if let Some(len) = filter.len() { - let idx = source_schema.index_of(field.name().as_str()).unwrap(); - column_distinct_count.insert(idx, len); + match field.data_type() { + TableDataType::Map(_) => {} + _ => { + let idx = source_schema.index_of(field.name().as_str()).unwrap(); + column_distinct_count.insert(idx, len); + } + } } let filter_name = Self::build_filter_column_name(version, field)?; @@ -386,7 +422,8 @@ fn visit_expr_column_eq_constant( expr: &mut Expr, visitor: &mut impl FnMut(Span, &str, &Scalar, &DataType, &DataType) -> Result>>, ) -> Result<()> { - // Find patterns like `Column = ` or ` = Column`. + // Find patterns like `Column = `, ` = Column`, + // or `MapColumn[] = `, ` = MapColumn[]` match expr { Expr::FunctionCall { span, @@ -409,6 +446,48 @@ fn visit_expr_column_eq_constant( return Ok(()); } } + [ + Expr::FunctionCall { id, args, .. }, + Expr::Constant { scalar, .. }, + ] + | [ + Expr::Constant { scalar, .. }, + Expr::FunctionCall { id, args, .. }, + ] => { + if id.name() == "get" { + if let Some(new_expr) = + visit_map_column(*span, args, scalar, return_type, visitor)? + { + *expr = new_expr; + return Ok(()); + } + } + } + [ + Expr::FunctionCall { id, args, .. }, + Expr::Cast { + expr: box cast_expr, + .. + }, + ] + | [ + Expr::Cast { + expr: box cast_expr, + .. + }, + Expr::FunctionCall { id, args, .. }, + ] => { + if let Expr::Constant { scalar, .. } = cast_expr { + if id.name() == "get" { + if let Some(new_expr) = + visit_map_column(*span, args, scalar, return_type, visitor)? + { + *expr = new_expr; + return Ok(()); + } + } + } + } _ => (), }, _ => (), @@ -429,3 +508,22 @@ fn visit_expr_column_eq_constant( Ok(()) } + +fn visit_map_column( + span: Span, + args: &[Expr], + scalar: &Scalar, + return_type: &DataType, + visitor: &mut impl FnMut(Span, &str, &Scalar, &DataType, &DataType) -> Result>>, +) -> Result>> { + if let Expr::ColumnRef { id, data_type, .. } = &args[0] { + if let DataType::Map(box inner_ty) = data_type.remove_nullable() { + let val_type = match inner_ty { + DataType::Tuple(kv_tys) => kv_tys[1].clone(), + _ => unreachable!(), + }; + return visitor(span, id, scalar, &val_type, return_type); + } + } + Ok(None) +} diff --git a/src/query/storages/common/index/src/lib.rs b/src/query/storages/common/index/src/lib.rs index 8669ebde14d39..50a673bf9b6bc 100644 --- a/src/query/storages/common/index/src/lib.rs +++ b/src/query/storages/common/index/src/lib.rs @@ -13,6 +13,7 @@ // limitations under the License. #![allow(clippy::uninlined_format_args)] +#![feature(box_patterns)] mod bloom_index; pub mod filters; diff --git a/src/query/storages/common/index/tests/it/filters/bloom_filter.rs b/src/query/storages/common/index/tests/it/filters/bloom_filter.rs index 6795d3352bc29..5508a83d5403c 100644 --- a/src/query/storages/common/index/tests/it/filters/bloom_filter.rs +++ b/src/query/storages/common/index/tests/it/filters/bloom_filter.rs @@ -16,14 +16,20 @@ use std::collections::HashMap; use std::sync::Arc; +use common_arrow::arrow::buffer::Buffer; use common_exception::Result; use common_expression::type_check::check_function; +use common_expression::types::array::ArrayColumn; +use common_expression::types::map::KvColumn; +use common_expression::types::map::KvPair; use common_expression::types::number::NumberScalar; use common_expression::types::number::UInt8Type; +use common_expression::types::AnyType; use common_expression::types::DataType; use common_expression::types::NumberDataType; use common_expression::types::StringType; use common_expression::BlockEntry; +use common_expression::Column; use common_expression::DataBlock; use common_expression::Expr; use common_expression::FromData; @@ -44,7 +50,24 @@ fn test_bloom_filter() -> Result<()> { let schema = Arc::new(TableSchema::new(vec![ TableField::new("0", TableDataType::Number(NumberDataType::UInt8)), TableField::new("1", TableDataType::String), + TableField::new( + "2", + TableDataType::Map(Box::new(TableDataType::Tuple { + fields_name: vec!["key".to_string(), "value".to_string()], + fields_type: vec![ + TableDataType::Number(NumberDataType::UInt8), + TableDataType::String, + ], + })), + ), ])); + + let kv_ty = DataType::Tuple(vec![ + DataType::Number(NumberDataType::UInt8), + DataType::String, + ]); + let map_ty = DataType::Map(Box::new(kv_ty)); + let blocks = vec![ DataBlock::new( vec![ @@ -56,12 +79,32 @@ fn test_bloom_filter() -> Result<()> { data_type: DataType::String, value: Value::Scalar(Scalar::String(b"a".to_vec())), }, + BlockEntry { + data_type: map_ty.clone(), + value: Value::Scalar(Scalar::Map(Column::Tuple { + fields: vec![ + UInt8Type::from_data(vec![1, 2]), + StringType::from_data(vec!["a", "b"]), + ], + len: 2, + })), + }, ], 2, ), DataBlock::new_from_columns(vec![ UInt8Type::from_data(vec![2, 3]), StringType::from_data(vec!["b", "c"]), + Column::Map(Box::new( + ArrayColumn::> { + values: KvColumn { + keys: UInt8Type::from_data(vec![1, 2, 3]), + values: StringType::from_data(vec!["b", "c", "d"]), + }, + offsets: Buffer::::from(vec![0, 2, 3]), + } + .upcast(), + )), ]), ]; let blocks_ref = blocks.iter().collect::>(); @@ -114,6 +157,42 @@ fn test_bloom_filter() -> Result<()> { eval_index(&index, "1", Scalar::String(b"d".to_vec()), DataType::String) ); + assert_eq!( + FilterEvalResult::Uncertain, + eval_map_index( + &index, + "2", + map_ty.clone(), + Scalar::Number(NumberScalar::UInt8(1)), + DataType::Number(NumberDataType::UInt8), + Scalar::String(b"a".to_vec()), + DataType::String + ) + ); + assert_eq!( + FilterEvalResult::Uncertain, + eval_map_index( + &index, + "2", + map_ty.clone(), + Scalar::Number(NumberScalar::UInt8(2)), + DataType::Number(NumberDataType::UInt8), + Scalar::String(b"b".to_vec()), + DataType::String + ) + ); + assert_eq!( + FilterEvalResult::MustFalse, + eval_map_index( + &index, + "2", + map_ty, + Scalar::Number(NumberScalar::UInt8(3)), + DataType::Number(NumberDataType::UInt8), + Scalar::String(b"x".to_vec()), + DataType::String + ) + ); Ok(()) } @@ -152,3 +231,59 @@ fn eval_index(index: &BloomIndex, col_name: &str, val: Scalar, ty: DataType) -> index.apply(expr, &scalar_map).unwrap() } + +fn eval_map_index( + index: &BloomIndex, + col_name: &str, + map_ty: DataType, + key: Scalar, + key_ty: DataType, + val: Scalar, + ty: DataType, +) -> FilterEvalResult { + let get_expr = check_function( + None, + "get", + &[], + &[ + Expr::ColumnRef { + span: None, + id: col_name.to_string(), + data_type: map_ty, + display_name: col_name.to_string(), + }, + Expr::Constant { + span: None, + scalar: key, + data_type: key_ty, + }, + ], + &BUILTIN_FUNCTIONS, + ) + .unwrap(); + let expr = check_function( + None, + "eq", + &[], + &[get_expr, Expr::Constant { + span: None, + scalar: val, + data_type: ty, + }], + &BUILTIN_FUNCTIONS, + ) + .unwrap(); + + let point_query_cols = BloomIndex::find_eq_columns(&expr).unwrap(); + + let mut scalar_map = HashMap::::new(); + let func_ctx = FunctionContext::default(); + for (_, scalar, ty) in point_query_cols.iter() { + if !scalar_map.contains_key(scalar) { + let digest = BloomIndex::calculate_scalar_digest(func_ctx, scalar, ty).unwrap(); + scalar_map.insert(scalar.clone(), digest); + } + } + + index.apply(expr, &scalar_map).unwrap() +} diff --git a/tests/sqllogictests/suites/base/03_common/03_0037_insert_into_map b/tests/sqllogictests/suites/base/03_common/03_0037_insert_into_map index c3874b965e6bd..fb4f1826411e7 100644 --- a/tests/sqllogictests/suites/base/03_common/03_0037_insert_into_map +++ b/tests/sqllogictests/suites/base/03_common/03_0037_insert_into_map @@ -27,6 +27,11 @@ abc def NULL NULL NULL NULL mn NULL NULL NULL NULL NULL +query IT +select * from t1 where m[300] = 'mn' +---- +2 {300:'mn'} + statement error 1001 INSERT INTO t1 (id, m) VALUES(1, {100:'k1',100:'k2'}) @@ -48,6 +53,11 @@ select m['k1'], m['k2'], m['k3'], m['k4'] from t2 ['2020-01-01','2021-01-02'] ['2022-01-01'] NULL NULL NULL NULL ['2023-01-01'] NULL +query IT +select * from t2 where m['k3'] = ['2023-01-01'::date] +---- +2 {'k3':['2023-01-01']} + statement error 1001 CREATE TABLE IF NOT EXISTS t3(id Int, m Map(Array(Date), String)) Engine = Fuse