From 502975e80066845b6ac40c08d39bddf6e090a5c2 Mon Sep 17 00:00:00 2001 From: baishen Date: Mon, 4 Apr 2022 00:47:52 +0800 Subject: [PATCH] feat(function): Support Semi-structured function get, get_ignore_case, get_path --- Cargo.lock | 1 + common/datavalues/src/columns/column.rs | 3 + common/datavalues/src/types/type_id.rs | 18 ++ common/functions/Cargo.toml | 1 + .../src/scalars/semi_structureds/get.rs | 251 ++++++++++++++++++ .../src/scalars/semi_structureds/mod.rs | 4 + .../semi_structureds/semi_structured.rs | 6 + .../tests/it/scalars/semi_structureds/get.rs | 143 ++++++++++ .../tests/it/scalars/semi_structureds/mod.rs | 1 + .../110-semi-structured-functions/get.md | 60 +++++ .../get_ignore_case.md | 39 +++ .../110-semi-structured-functions/get_path.md | 65 +++++ ..._0050_function_string_regexp_instr.result} | 0 ... 02_0050_function_string_regexp_instr.sql} | 0 ..._0051_function_semi_structureds_get.result | 30 +++ .../02_0051_function_semi_structureds_get.sql | 50 ++++ 16 files changed, 672 insertions(+) create mode 100644 common/functions/src/scalars/semi_structureds/get.rs create mode 100644 common/functions/tests/it/scalars/semi_structureds/get.rs create mode 100644 docs/doc/30-reference/20-functions/110-semi-structured-functions/get.md create mode 100644 docs/doc/30-reference/20-functions/110-semi-structured-functions/get_ignore_case.md create mode 100644 docs/doc/30-reference/20-functions/110-semi-structured-functions/get_path.md rename tests/suites/0_stateless/02_function/{02_0049_function_string_regexp_instr.result => 02_0050_function_string_regexp_instr.result} (100%) rename tests/suites/0_stateless/02_function/{02_0049_function_string_regexp_instr.sql => 02_0050_function_string_regexp_instr.sql} (100%) create mode 100644 tests/suites/0_stateless/02_function/02_0051_function_semi_structureds_get.result create mode 100644 tests/suites/0_stateless/02_function/02_0051_function_semi_structureds_get.sql diff --git a/Cargo.lock b/Cargo.lock index 3d348934259a..0d20b6f71644 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -997,6 +997,7 @@ dependencies = [ "sha1", "sha2 0.10.2", "simdutf8", + "sqlparser", "strength_reduce", "twox-hash", "uuid", diff --git a/common/datavalues/src/columns/column.rs b/common/datavalues/src/columns/column.rs index 16b902a64f3e..71d25da542b0 100644 --- a/common/datavalues/src/columns/column.rs +++ b/common/datavalues/src/columns/column.rs @@ -256,6 +256,9 @@ impl std::fmt::Debug for dyn Column + '_ { Struct => { fmt_dyn!(col, StructColumn, f) }, + Variant | VariantArray | VariantObject => { + fmt_dyn!(col, JsonColumn, f) + } _ => { unimplemented!() } diff --git a/common/datavalues/src/types/type_id.rs b/common/datavalues/src/types/type_id.rs index 27054d539110..94cb92fe1368 100644 --- a/common/datavalues/src/types/type_id.rs +++ b/common/datavalues/src/types/type_id.rs @@ -182,6 +182,24 @@ impl TypeID { ) } + #[inline] + pub fn is_variant(&self) -> bool { + matches!( + self, + TypeID::Variant | TypeID::VariantArray | TypeID::VariantObject + ) + } + + #[inline] + pub fn is_variant_or_array(&self) -> bool { + matches!(self, TypeID::Variant | TypeID::VariantArray) + } + + #[inline] + pub fn is_variant_or_object(&self) -> bool { + matches!(self, TypeID::Variant | TypeID::VariantObject) + } + #[inline] pub fn numeric_byte_size(&self) -> Result { match self { diff --git a/common/functions/Cargo.toml b/common/functions/Cargo.toml index fc871a87b386..75d0437da5e0 100644 --- a/common/functions/Cargo.toml +++ b/common/functions/Cargo.toml @@ -44,6 +44,7 @@ serde_json = "1.0.79" sha1 = "0.10.1" sha2 = "0.10.2" simdutf8 = "0.1.4" +sqlparser = { git = "https://github.com/datafuse-extras/sqlparser-rs", rev = "1c8d3f1" } strength_reduce = "0.2.3" twox-hash = "1.6.2" uuid = { version = "0.8.2", features = ["v4"] } diff --git a/common/functions/src/scalars/semi_structureds/get.rs b/common/functions/src/scalars/semi_structureds/get.rs new file mode 100644 index 000000000000..54b8f163eb5c --- /dev/null +++ b/common/functions/src/scalars/semi_structureds/get.rs @@ -0,0 +1,251 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt; +use std::sync::Arc; + +use common_datavalues::prelude::*; +use common_exception::ErrorCode; +use common_exception::Result; +use serde_json::Value as JsonValue; +use sqlparser::ast::Value; +use sqlparser::dialect::GenericDialect; +use sqlparser::parser::Parser; +use sqlparser::tokenizer::Tokenizer; + +use crate::scalars::Function; +use crate::scalars::FunctionDescription; +use crate::scalars::FunctionFeatures; + +pub type GetFunction = GetFunctionImpl; + +pub type GetIgnoreCaseFunction = GetFunctionImpl; + +pub type GetPathFunction = GetFunctionImpl; + +#[derive(Clone)] +pub struct GetFunctionImpl { + display_name: String, +} + +impl GetFunctionImpl { + pub fn try_create(display_name: &str) -> Result> { + Ok(Box::new(GetFunctionImpl:: { + display_name: display_name.to_string(), + })) + } + + pub fn desc() -> FunctionDescription { + FunctionDescription::creator(Box::new(Self::try_create)) + .features(FunctionFeatures::default().deterministic().num_arguments(2)) + } +} + +impl Function + for GetFunctionImpl +{ + fn name(&self) -> &str { + &*self.display_name + } + + fn return_type(&self, args: &[&DataTypePtr]) -> Result { + let data_type = args[0]; + let path_type = args[1]; + + if (IGNORE_CASE + && (!data_type.data_type_id().is_variant_or_object() + || !path_type.data_type_id().is_string())) + || (BY_PATH + && (!data_type.data_type_id().is_variant() + || !path_type.data_type_id().is_string())) + || (!data_type.data_type_id().is_variant() + || (!path_type.data_type_id().is_string() + && !path_type.data_type_id().is_unsigned_integer())) + { + return Err(ErrorCode::IllegalDataType(format!( + "Invalid argument types for function '{}': ({:?}, {:?})", + self.display_name.to_uppercase(), + data_type, + path_type + ))); + } + + Ok(Arc::new(NullableType::create(VariantType::arc()))) + } + + fn eval(&self, columns: &ColumnsWithField, input_rows: usize) -> Result { + let path_keys = if BY_PATH { + parse_path_keys(columns[1].column())? + } else { + build_path_keys(columns[1].column())? + }; + + extract_value_by_path(columns[0].column(), path_keys, input_rows, IGNORE_CASE) + } +} + +impl fmt::Display + for GetFunctionImpl +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.display_name.to_uppercase()) + } +} + +fn parse_path_keys(column: &ColumnRef) -> Result>> { + let column: &StringColumn = if column.is_const() { + let const_column: &ConstColumn = Series::check_get(column)?; + Series::check_get(const_column.inner())? + } else { + Series::check_get(column)? + }; + + let dialect = &GenericDialect {}; + let mut path_keys: Vec> = vec![]; + for v in column.iter() { + if v.is_empty() { + return Err(ErrorCode::SyntaxException( + "Bad compound object's field path name: '' in GET_PATH", + )); + } + let definition = std::str::from_utf8(v).unwrap(); + let mut tokenizer = Tokenizer::new(dialect, definition); + match tokenizer.tokenize() { + Ok((tokens, position_map)) => { + match Parser::new(tokens, position_map, dialect).parse_map_keys() { + Ok(values) => { + let path_key: Vec = values + .iter() + .map(|v| match v { + Value::Number(value, _) => { + DataValue::try_from_literal(value, None).unwrap() + } + Value::SingleQuotedString(value) => { + DataValue::String(value.clone().into_bytes()) + } + Value::ColonString(value) => { + DataValue::String(value.clone().into_bytes()) + } + Value::PeriodString(value) => { + DataValue::String(value.clone().into_bytes()) + } + _ => DataValue::Null, + }) + .collect(); + + path_keys.push(path_key); + } + Err(parse_error) => return Err(ErrorCode::from(parse_error)), + } + } + Err(tokenize_error) => { + return Err(ErrorCode::SyntaxException(format!( + "Can not tokenize definition: {}, Error: {:?}", + definition, tokenize_error + ))) + } + } + } + Ok(path_keys) +} + +fn build_path_keys(column: &ColumnRef) -> Result>> { + if column.is_const() { + let const_column: &ConstColumn = Series::check_get(column)?; + return build_path_keys(const_column.inner()); + } + + let mut path_keys: Vec> = vec![]; + for i in 0..column.len() { + path_keys.push(vec![column.get(i)]); + } + Ok(path_keys) +} + +fn extract_value_by_path( + column: &ColumnRef, + path_keys: Vec>, + input_rows: usize, + ignore_case: bool, +) -> Result { + let column: &JsonColumn = if column.is_const() { + let const_column: &ConstColumn = Series::check_get(column)?; + Series::check_get(const_column.inner())? + } else { + Series::check_get(column)? + }; + + let mut builder = NullableColumnBuilder::::with_capacity(input_rows); + for path_key in path_keys.iter() { + if path_key.is_empty() { + for _ in 0..column.len() { + builder.append_null(); + } + continue; + } + for v in column.iter() { + let mut found_value = true; + let mut value = v; + for key in path_key.iter() { + match key { + DataValue::UInt64(k) => match value.get(*k as usize) { + Some(child_value) => value = child_value, + None => { + found_value = false; + break; + } + }, + DataValue::String(k) => match String::from_utf8(k.to_vec()) { + Ok(k) => match value.get(&k) { + Some(child_value) => value = child_value, + None => { + // if no exact match value found, return one of the ambiguous matches + if ignore_case && value.is_object() { + let mut ignore_case_found_value = false; + let obj = value.as_object().unwrap(); + for (_, (child_key, child_value)) in obj.iter().enumerate() { + if k.to_lowercase() == child_key.to_lowercase() { + ignore_case_found_value = true; + value = child_value; + break; + } + } + if ignore_case_found_value { + continue; + } + } + found_value = false; + break; + } + }, + Err(_) => { + found_value = false; + break; + } + }, + _ => { + found_value = false; + break; + } + } + } + if found_value { + builder.append(value, true); + } else { + builder.append_null(); + } + } + } + Ok(builder.build(input_rows)) +} diff --git a/common/functions/src/scalars/semi_structureds/mod.rs b/common/functions/src/scalars/semi_structureds/mod.rs index 31abb3845fe4..e37f30d56985 100644 --- a/common/functions/src/scalars/semi_structureds/mod.rs +++ b/common/functions/src/scalars/semi_structureds/mod.rs @@ -13,10 +13,14 @@ // limitations under the License. mod check_json; +mod get; mod parse_json; mod semi_structured; pub use check_json::CheckJsonFunction; +pub use get::GetFunction; +pub use get::GetIgnoreCaseFunction; +pub use get::GetPathFunction; pub use parse_json::ParseJsonFunction; pub use parse_json::TryParseJsonFunction; pub use semi_structured::SemiStructuredFunction; diff --git a/common/functions/src/scalars/semi_structureds/semi_structured.rs b/common/functions/src/scalars/semi_structureds/semi_structured.rs index fbeac93a3f0e..7bef21b09734 100644 --- a/common/functions/src/scalars/semi_structureds/semi_structured.rs +++ b/common/functions/src/scalars/semi_structureds/semi_structured.rs @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use super::get::GetFunction; +use super::get::GetIgnoreCaseFunction; +use super::get::GetPathFunction; use super::parse_json::ParseJsonFunction; use super::parse_json::TryParseJsonFunction; use crate::scalars::CheckJsonFunction; @@ -24,5 +27,8 @@ impl SemiStructuredFunction { factory.register("parse_json", ParseJsonFunction::desc()); factory.register("try_parse_json", TryParseJsonFunction::desc()); factory.register("check_json", CheckJsonFunction::desc()); + factory.register("get", GetFunction::desc()); + factory.register("get_ignore_case", GetIgnoreCaseFunction::desc()); + factory.register("get_path", GetPathFunction::desc()); } } diff --git a/common/functions/tests/it/scalars/semi_structureds/get.rs b/common/functions/tests/it/scalars/semi_structureds/get.rs new file mode 100644 index 000000000000..af571370a335 --- /dev/null +++ b/common/functions/tests/it/scalars/semi_structureds/get.rs @@ -0,0 +1,143 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_exception::Result; +use common_functions::scalars::GetFunction; +use common_functions::scalars::GetIgnoreCaseFunction; +use common_functions::scalars::GetPathFunction; +use serde_json::json; + +use crate::scalars::scalar_function2_test::test_scalar_functions; +use crate::scalars::scalar_function2_test::ScalarFunctionTest; + +#[test] +fn test_get_function() -> Result<()> { + use common_datavalues::prelude::*; + + let tests = vec![ + ScalarFunctionTest { + name: "get_by_field_name", + columns: vec![ + Series::from_data(vec![ + json!({"a":1_i32,"b":2_i32}), + json!({"A":3_i32,"B":4_i32}), + json!([1_i32, 2, 3]), + ]), + Series::from_data(vec!["a"]), + ], + expect: Series::from_data(vec![Some(json!(1_i32)), None, None]), + error: "", + }, + ScalarFunctionTest { + name: "get_by_index", + columns: vec![ + Series::from_data(vec![ + json!([0_i32, 1, 2]), + json!(["\"a\"", "\"b\"", "\"c\""]), + json!({"key":"val"}), + ]), + Series::from_data(vec![0_u32, 1_u32]), + ], + expect: Series::from_data(vec![ + Some(json!(0_i32)), + Some(json!("\"a\"")), + None, + Some(json!(1_i32)), + Some(json!("\"b\"")), + None, + ]), + error: "", + }, + ScalarFunctionTest { + name: "get_by_field_name_error_type", + columns: vec![ + Series::from_data(vec!["abc", "123"]), + Series::from_data(vec![0_i32]), + ], + expect: Series::from_data(vec![None::<&str>, None::<&str>]), + error: "Invalid argument types for function 'GET': (String, Int32)", + }, + ]; + + test_scalar_functions(GetFunction::try_create("get")?, &tests, false) +} + +#[test] +fn test_get_ignore_case_function() -> Result<()> { + use common_datavalues::prelude::*; + + let tests = vec![ + ScalarFunctionTest { + name: "get_by_field_name", + columns: vec![ + Series::from_data(vec![ + json!({"aa":1_i32, "aA":2, "Aa":3}), + json!([1_i32, 2, 3]), + ]), + Series::from_data(vec!["aA", "AA"]), + ], + expect: Series::from_data(vec![Some(json!(2_i32)), None, Some(json!(1_i32)), None]), + error: "", + }, + ScalarFunctionTest { + name: "get_by_field_name_error_type", + columns: vec![ + Series::from_data(vec!["abc", "123"]), + Series::from_data(vec![0_i32]), + ], + expect: Series::from_data(vec![None::<&str>, None::<&str>]), + error: "Invalid argument types for function 'GET_IGNORE_CASE': (String, Int32)", + }, + ]; + + test_scalar_functions( + GetIgnoreCaseFunction::try_create("get_ignore_case")?, + &tests, + false, + ) +} + +#[test] +fn test_get_path_function() -> Result<()> { + use common_datavalues::prelude::*; + + let tests = vec![ + ScalarFunctionTest { + name: "get_by_path", + columns: vec![ + Series::from_data(vec![json!({"a":[[1_i32],[2_i32]],"o":{"p":{"q":"r"}}})]), + Series::from_data(vec!["a[0][0]", "a.b", "o.p:q", "o['p']['q']", "o[0]"]), + ], + expect: Series::from_data(vec![ + Some(json!(1_i32)), + None, + Some(json!("r")), + Some(json!("r")), + None, + ]), + error: "", + }, + ScalarFunctionTest { + name: "get_by_path_error_type", + columns: vec![ + Series::from_data(vec!["abc", "123"]), + Series::from_data(vec![0_i32]), + ], + expect: Series::from_data(vec![None::<&str>, None::<&str>]), + error: "Invalid argument types for function 'GET_PATH': (String, Int32)", + }, + ]; + + test_scalar_functions(GetPathFunction::try_create("get_path")?, &tests, false) +} diff --git a/common/functions/tests/it/scalars/semi_structureds/mod.rs b/common/functions/tests/it/scalars/semi_structureds/mod.rs index 82da6ba8fcd8..ac230989db57 100644 --- a/common/functions/tests/it/scalars/semi_structureds/mod.rs +++ b/common/functions/tests/it/scalars/semi_structureds/mod.rs @@ -13,4 +13,5 @@ // limitations under the License. mod check_json; +mod get; mod parse_json; diff --git a/docs/doc/30-reference/20-functions/110-semi-structured-functions/get.md b/docs/doc/30-reference/20-functions/110-semi-structured-functions/get.md new file mode 100644 index 000000000000..87bec1e73483 --- /dev/null +++ b/docs/doc/30-reference/20-functions/110-semi-structured-functions/get.md @@ -0,0 +1,60 @@ +--- +title: Get +--- + +Extracts value from an `ARRAY` by `index`, an `OBJECT` by `field_name`, or a `VARIANT` that contains either `ARRAY` or `OBJECT`. +The value is returned as a `Variant` or `NULL` if either of the arguments is `NULL`. + +`GET` applies case-sensitive matching to `field_name`. For case-insensitive matching, use `GET_IGNORE_CASE`. + +## Syntax + +```sql +get(array, index) +get(variant, index) + +get(object, field_name) +get(variant, field_name) +``` + +## Arguments + +| Arguments | Description | +| ----------- | ----------- | +| array | The ARRAY value +| object | The OBJECT value +| variant | The VARIANT value that contains either an ARRAY or an OBJECT +| index | The Uint32 value specifies the position of the value in ARRAY +| field_name | The String value specifies the key in a key-value pair of OBJECT + +## Return Type + +Variant + +## Examples + +```sql +mysql> select get(parse_json('[2.71, 3.14]'), 0); ++------------------------------------+ +| get(parse_json('[2.71, 3.14]'), 0) | ++------------------------------------+ +| 2.71 | ++------------------------------------+ +1 row in set (0.01 sec) + +mysql> select get(parse_json('{"aa":1, "aA":2, "Aa":3}'), 'aa'); ++---------------------------------------------------+ +| get(parse_json('{"aa":1, "aA":2, "Aa":3}'), 'aa') | ++---------------------------------------------------+ +| 1 | ++---------------------------------------------------+ +1 row in set (0.01 sec) + +mysql> select get(parse_json('{"aa":1, "aA":2, "Aa":3}'), 'AA'); ++---------------------------------------------------+ +| get(parse_json('{"aa":1, "aA":2, "Aa":3}'), 'AA') | ++---------------------------------------------------+ +| NULL | ++---------------------------------------------------+ +1 row in set (0.01 sec) +``` diff --git a/docs/doc/30-reference/20-functions/110-semi-structured-functions/get_ignore_case.md b/docs/doc/30-reference/20-functions/110-semi-structured-functions/get_ignore_case.md new file mode 100644 index 000000000000..e5e852d69efd --- /dev/null +++ b/docs/doc/30-reference/20-functions/110-semi-structured-functions/get_ignore_case.md @@ -0,0 +1,39 @@ +--- +title: Get Ignore Case +--- + +Extracts value from an `OBJECT` by `field_name`, or a `VARIANT` that contains `OBJECT`. +The value is returned as a `Variant` or `NULL` if either of the arguments is `NULL`. + +`GET_IGNORE_CASE` is similar to `GET` but applies case-insensitive matching to field names. + +## Syntax + +```sql +get_ignore_case(object, field_name) +get_ignore_case(variant, field_name) +``` + +## Arguments + +| Arguments | Description | +| ----------- | ----------- | +| object | The OBJECT value +| variant | The VARIANT value that contains either an ARRAY or an OBJECT +| field_name | The String value specifies the key in a key-value pair of OBJECT + +## Return Type + +Variant + +## Examples + +```sql +mysql> select get_ignore_case(parse_json('{"aa":1, "aA":2, "Aa":3}'), 'AA'); ++---------------------------------------------------------------+ +| get_ignore_case(parse_json('{"aa":1, "aA":2, "Aa":3}'), 'AA') | ++---------------------------------------------------------------+ +| 1 | ++---------------------------------------------------------------+ +1 row in set (0.01 sec) +``` diff --git a/docs/doc/30-reference/20-functions/110-semi-structured-functions/get_path.md b/docs/doc/30-reference/20-functions/110-semi-structured-functions/get_path.md new file mode 100644 index 000000000000..394a30df399c --- /dev/null +++ b/docs/doc/30-reference/20-functions/110-semi-structured-functions/get_path.md @@ -0,0 +1,65 @@ +--- +title: Get Path +--- + +Extracts value from an `ARRAY`, an `OBJECT`, or a `VARIANT` by `path_name`. +The value is returned as a `Variant` or `NULL` if either of the arguments is `NULL`. + +`GET_PATH` is equivalent to a chain of `GET` functions, `path_name` consists of a concatenation of field names preceded by periods (.), colons (:) or index operators (`[index]`). The first field name does not require the leading identifier to be specified. + +## Syntax + +```sql +get_path(array, path_name) +get_path(object, path_name) +get_path(variant, path_name) +``` + +## Arguments + +| Arguments | Description | +| ----------- | ----------- | +| array | The ARRAY value +| object | The OBJECT value +| variant | The VARIANT value that contains either an ARRAY or an OBJECT +| path_name | The String value that consists of a concatenation of field names + +## Return Type + +Variant + +## Examples + +```sql +mysql> select get_path(parse_json('{"k1":[0,1,2], "k2":{"k3":3,"k4":4}}'), 'k1[0]'); ++-----------------------------------------------------------------------+ +| get_path(parse_json('{"k1":[0,1,2], "k2":{"k3":3,"k4":4}}'), 'k1[0]') | ++-----------------------------------------------------------------------+ +| 0 | ++-----------------------------------------------------------------------+ +1 row in set (0.01 sec) + +mysql> select get_path(parse_json('{"k1":[0,1,2], "k2":{"k3":3,"k4":4}}'), 'k2:k3'); ++-----------------------------------------------------------------------+ +| get_path(parse_json('{"k1":[0,1,2], "k2":{"k3":3,"k4":4}}'), 'k2:k3') | ++-----------------------------------------------------------------------+ +| 3 | ++-----------------------------------------------------------------------+ +1 row in set (0.01 sec) + +mysql> select get_path(parse_json('{"k1":[0,1,2], "k2":{"k3":3,"k4":4}}'), 'k2.k4'); ++-----------------------------------------------------------------------+ +| get_path(parse_json('{"k1":[0,1,2], "k2":{"k3":3,"k4":4}}'), 'k2.k4') | ++-----------------------------------------------------------------------+ +| 4 | ++-----------------------------------------------------------------------+ +1 row in set (0.02 sec) + +mysql> select get_path(parse_json('{"k1":[0,1,2], "k2":{"k3":3,"k4":4}}'), 'k2.k5'); ++-----------------------------------------------------------------------+ +| get_path(parse_json('{"k1":[0,1,2], "k2":{"k3":3,"k4":4}}'), 'k2.k5') | ++-----------------------------------------------------------------------+ +| NULL | ++-----------------------------------------------------------------------+ +1 row in set (0.03 sec) +``` diff --git a/tests/suites/0_stateless/02_function/02_0049_function_string_regexp_instr.result b/tests/suites/0_stateless/02_function/02_0050_function_string_regexp_instr.result similarity index 100% rename from tests/suites/0_stateless/02_function/02_0049_function_string_regexp_instr.result rename to tests/suites/0_stateless/02_function/02_0050_function_string_regexp_instr.result diff --git a/tests/suites/0_stateless/02_function/02_0049_function_string_regexp_instr.sql b/tests/suites/0_stateless/02_function/02_0050_function_string_regexp_instr.sql similarity index 100% rename from tests/suites/0_stateless/02_function/02_0049_function_string_regexp_instr.sql rename to tests/suites/0_stateless/02_function/02_0050_function_string_regexp_instr.sql diff --git a/tests/suites/0_stateless/02_function/02_0051_function_semi_structureds_get.result b/tests/suites/0_stateless/02_function/02_0051_function_semi_structureds_get.result new file mode 100644 index 000000000000..0296176f213b --- /dev/null +++ b/tests/suites/0_stateless/02_function/02_0051_function_semi_structureds_get.result @@ -0,0 +1,30 @@ +==get== +2.71 +NULL +2 +NULL +==get_ignore_case== +2 +1 +==get_path== +1 +2 +1 +"databend" +"ext" +NULL +==get from table== +1 +NULL +NULL +1 +==get_ignore_case from table== +1 +1 +==get_path from table== +1 +"a" +1 +1 +2 +2 diff --git a/tests/suites/0_stateless/02_function/02_0051_function_semi_structureds_get.sql b/tests/suites/0_stateless/02_function/02_0051_function_semi_structureds_get.sql new file mode 100644 index 000000000000..8d5049076c67 --- /dev/null +++ b/tests/suites/0_stateless/02_function/02_0051_function_semi_structureds_get.sql @@ -0,0 +1,50 @@ +select '==get=='; +select get(parse_json('[2.71, 3.14]'), 0); +select get(parse_json('[2.71, 3.14]'), 2); +select get(parse_json('{"aa":1, "aA":2, "Aa":3}'), 'aA'); +select get(parse_json('{"aa":1, "aA":2, "Aa":3}'), 'AA'); + +select '==get_ignore_case=='; +select get_ignore_case(parse_json('{"aa":1, "aA":2, "Aa":3}'), 'aA'); +select get_ignore_case(parse_json('{"aa":1, "aA":2, "Aa":3}'), 'AA'); + +select '==get_path=='; +select get_path(parse_json('{"attr":[{"name":1}, {"name":2}]}'), 'attr[0].name'); +select get_path(parse_json('{"attr":[{"name":1}, {"name":2}]}'), 'attr[1]:name'); +select get_path(parse_json('{"customer":{"id":1, "name":"databend", "extras":["ext", "test"]}}'), 'customer:id'); +select get_path(parse_json('{"customer":{"id":1, "name":"databend", "extras":["ext", "test"]}}'), 'customer.name'); +select get_path(parse_json('{"customer":{"id":1, "name":"databend", "extras":["ext", "test"]}}'), 'customer["extras"][0]'); +select get_path(parse_json('{"customer":{"id":1, "name":"databend", "extras":["ext", "test"]}}'), 'customer["extras"][2]'); +select get_path(parse_json('{"customer":{"id":1, "name":"databend", "extras":["ext", "test"]}}'), ''); -- {ErrorCode 1005} + +DROP DATABASE IF EXISTS db1; +CREATE DATABASE db1; +USE db1; + +CREATE TABLE IF NOT EXISTS t1(id Int null, arr Array null) Engine = Memory; + +insert into t1 select 1, parse_json('[1,2,3,["a","b","c"]]'); + +CREATE TABLE IF NOT EXISTS t2(id Int null, obj Object null) Engine = Memory; + +insert into t2 select 1, parse_json('{"a":1,"b":{"c":2}}'); + +select '==get from table=='; +select get(arr, 0) from t1; +select get(arr, 'a') from t1; +select get(obj, 0) from t2; +select get(obj, 'a') from t2; + +select '==get_ignore_case from table=='; +select get_ignore_case(obj, 'a') from t2; +select get_ignore_case(obj, 'A') from t2; + +select '==get_path from table=='; +select get_path(arr, '[0]') from t1; +select get_path(arr, '[3][0]') from t1; +select get_path(obj, 'a') from t2; +select get_path(obj, '["a"]') from t2; +select get_path(obj, 'b.c') from t2; +select get_path(obj, '["b"]["c"]') from t2; + +DROP DATABASE db1;