From ee1f9c914b5f84bf83e1edfd06563ea2eb34c588 Mon Sep 17 00:00:00 2001 From: baishen Date: Sat, 22 Apr 2023 07:54:29 +0800 Subject: [PATCH] feat(function): Support `json_path_query` using JSON path (#11142) * feat(function): Support `json_path_query` using JSON path * fix tests * fix tests * fix --- Cargo.lock | 33 +- Cargo.toml | 4 +- src/query/expression/src/evaluator.rs | 13 +- src/query/expression/src/function.rs | 7 +- src/query/functions/src/scalars/variant.rs | 634 +++++++++++------- src/query/functions/src/srfs/mod.rs | 74 +- .../it/scalars/testdata/function_list.txt | 35 +- .../tests/it/scalars/testdata/variant.txt | 239 +++++-- .../functions/tests/it/scalars/variant.rs | 96 +++ .../02_0051_function_semi_structureds_get | 122 +++- 10 files changed, 945 insertions(+), 312 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 85ae7dfb01932..895b81d0536fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1651,7 +1651,7 @@ dependencies = [ "micromarshal", "num-traits", "once_cell", - "ordered-float 3.4.0", + "ordered-float 3.6.0", "pretty_assertions", "rand 0.8.5", "rust_decimal", @@ -1678,7 +1678,7 @@ dependencies = [ "match-template", "micromarshal", "num", - "ordered-float 3.4.0", + "ordered-float 3.6.0", "pretty_assertions", "serde_json", "storages-common-blocks", @@ -1724,7 +1724,7 @@ dependencies = [ "naive-cityhash", "num-traits", "once_cell", - "ordered-float 3.4.0", + "ordered-float 3.6.0", "rand 0.8.5", "regex", "serde", @@ -1763,7 +1763,7 @@ dependencies = [ "cfg-if", "common-base", "ethnum", - "ordered-float 3.4.0", + "ordered-float 3.6.0", "rand 0.8.5", ] @@ -1805,7 +1805,7 @@ dependencies = [ "ethnum", "lexical-core", "micromarshal", - "ordered-float 3.4.0", + "ordered-float 3.6.0", "rand 0.8.5", "serde", ] @@ -2269,7 +2269,7 @@ dependencies = [ "num-traits", "once_cell", "opendal", - "ordered-float 3.4.0", + "ordered-float 3.6.0", "parking_lot 0.12.1", "percent-encoding", "regex", @@ -2393,7 +2393,7 @@ dependencies = [ "databend-thrift", "futures", "opendal", - "ordered-float 3.4.0", + "ordered-float 3.6.0", "serde", "storages-common-cache", "storages-common-cache-manager", @@ -3371,7 +3371,7 @@ dependencies = [ "once_cell", "opendal", "opensrv-mysql", - "ordered-float 3.4.0", + "ordered-float 3.6.0", "p256 0.13.0", "parking_lot 0.12.1", "paste", @@ -5889,13 +5889,14 @@ dependencies = [ [[package]] name = "jsonb" -version = "0.1.1" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd7cd23238fe597fd093ca5d3efc50fc1e805027a63220838d0c5cb9c03df897" +checksum = "3ed36ed96d7d9364e3a386fe2a95ceec46bdbcd6676384cdf5563f9618c73ed0" dependencies = [ "byteorder", "fast-float", - "ordered-float 3.4.0", + "nom 7.1.3", + "ordered-float 3.6.0", "serde", "serde_json", ] @@ -6402,7 +6403,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2777e7006e9b80afb5d2797e2b9e23865451708c9e280a61253586a0dd3a5db1" dependencies = [ "ethnum", - "ordered-float 3.4.0", + "ordered-float 3.6.0", ] [[package]] @@ -7080,9 +7081,9 @@ dependencies = [ [[package]] name = "ordered-float" -version = "3.4.0" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d84eb1409416d254e4a9c8fa56cc24701755025b458f0fcd8e59e1f5f40c23bf" +checksum = "13a384337e997e6860ffbaa83708b2ef329fd8c54cb67a5f64d421e0f943254f" dependencies = [ "num-traits", "rand 0.8.5", @@ -8850,9 +8851,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.94" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c533a59c9d8a93a09c6ab31f0fd5e5f4dd1b8fc9434804029839884765d04ea" +checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" dependencies = [ "indexmap", "itoa", diff --git a/Cargo.toml b/Cargo.toml index 5c35bbcf82b21..98bb7319a03a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -101,8 +101,8 @@ opendal = { version = "0.32", features = [ "services-redis", "trust-dns", ] } -ordered-float = { version = "3.4.0", default-features = false } -jsonb = { version = "0.1.1" } +ordered-float = { version = "3.6.0", default-features = false } +jsonb = { version = "0.2.0" } # openraft = { version = "0.8.2", features = ["compat-07"] } # For debugging diff --git a/src/query/expression/src/evaluator.rs b/src/query/expression/src/evaluator.rs index 4a55a933a4f43..6def71a857d5e 100644 --- a/src/query/expression/src/evaluator.rs +++ b/src/query/expression/src/evaluator.rs @@ -881,9 +881,11 @@ impl<'a> Evaluator<'a> { /// for each input row, along with the number of rows in each set. pub fn run_srf(&self, expr: &Expr) -> Result, usize)>> { if let Expr::FunctionCall { + span, function, args, return_type, + generics, .. } = expr { @@ -894,7 +896,16 @@ impl<'a> Evaluator<'a> { .map(|expr| self.run(expr)) .collect::>>()?; let cols_ref = args.iter().map(Value::as_ref).collect::>(); - let result = (eval)(&cols_ref, self.input_columns.num_rows()); + let mut ctx = EvalContext { + generics, + num_rows: self.input_columns.num_rows(), + validity: None, + errors: None, + tz: self.func_ctx.tz, + func_ctx: self.func_ctx, + }; + let result = (eval)(&cols_ref, &mut ctx); + ctx.render_error(*span, &args, &function.signature.name)?; assert_eq!(result.len(), self.input_columns.num_rows()); return Ok(result); } diff --git a/src/query/expression/src/function.rs b/src/query/expression/src/function.rs index a5e4f417f2a7d..5941e05f0e1a9 100755 --- a/src/query/expression/src/function.rs +++ b/src/query/expression/src/function.rs @@ -77,8 +77,11 @@ pub enum FunctionEval { SRF { /// Given multiple rows, return multiple sets of results /// for each input row, along with the number of rows in each set. - eval: - Box], usize) -> Vec<(Value, usize)> + Send + Sync>, + eval: Box< + dyn Fn(&[ValueRef], &mut EvalContext) -> Vec<(Value, usize)> + + Send + + Sync, + >, }, } diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index 69964b16a8f69..5fa7730d9220f 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::Cow; use std::collections::HashSet; use std::sync::Arc; @@ -27,7 +26,6 @@ use common_expression::types::string::StringColumnBuilder; use common_expression::types::timestamp::string_to_timestamp; use common_expression::types::variant::cast_scalar_to_variant; use common_expression::types::variant::cast_scalars_to_variants; -use common_expression::types::variant::JSONB_NULL; use common_expression::types::AnyType; use common_expression::types::BooleanType; use common_expression::types::DataType; @@ -42,7 +40,6 @@ use common_expression::types::VariantType; use common_expression::types::ALL_NUMERICS_TYPES; use common_expression::utils::arrow::constant_bitmap; use common_expression::vectorize_1_arg; -use common_expression::vectorize_2_arg; use common_expression::vectorize_with_builder_1_arg; use common_expression::vectorize_with_builder_2_arg; use common_expression::with_number_mapped_type; @@ -63,20 +60,25 @@ use jsonb::as_bool; use jsonb::as_f64; use jsonb::as_i64; use jsonb::as_str; +use jsonb::build_array; use jsonb::build_object; +use jsonb::get_by_index; +use jsonb::get_by_name; use jsonb::get_by_name_ignore_case; use jsonb::get_by_path; +use jsonb::get_by_path_array; +use jsonb::get_by_path_first; use jsonb::is_array; use jsonb::is_object; +use jsonb::jsonpath::parse_json_path; use jsonb::object_keys; -use jsonb::parse_json_path; use jsonb::parse_value; use jsonb::to_bool; use jsonb::to_f64; use jsonb::to_i64; use jsonb::to_str; +use jsonb::to_string; use jsonb::to_u64; -use jsonb::JsonPathRef; pub fn register(registry: &mut FunctionRegistry) { registry.register_aliases("json_object_keys", &["object_keys"]); @@ -85,10 +87,11 @@ pub fn register(registry: &mut FunctionRegistry) { "parse_json", |_| FunctionDomain::MayThrow, vectorize_with_builder_1_arg::(|s, output, ctx| { - if s.trim().is_empty() { - output.put_slice(JSONB_NULL); - output.commit_row(); - return; + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.commit_row(); + return; + } } match parse_value(s) { Ok(value) => { @@ -105,34 +108,38 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_combine_nullable_1_arg::( "try_parse_json", |_| FunctionDomain::Full, - vectorize_with_builder_1_arg::>(|s, output, _| { - if s.trim().is_empty() { - output.push(JSONB_NULL); - } else { - match parse_value(s) { - Ok(value) => { - output.validity.push(true); - value.write_to_vec(&mut output.builder.data); - output.builder.commit_row(); - } - Err(_) => output.push_null(), + vectorize_with_builder_1_arg::>(|s, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; } } + match parse_value(s) { + Ok(value) => { + output.validity.push(true); + value.write_to_vec(&mut output.builder.data); + output.builder.commit_row(); + } + Err(_) => output.push_null(), + } }), ); registry.register_combine_nullable_1_arg::( "check_json", |_| FunctionDomain::Full, - vectorize_with_builder_1_arg::>(|s, output, _| { - if s.trim().is_empty() { - output.push_null(); - } else { - match parse_value(s) { - Ok(_) => output.push_null(), - Err(e) => output.push(e.to_string().as_bytes()), + vectorize_with_builder_1_arg::>(|s, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; } } + match parse_value(s) { + Ok(_) => output.push_null(), + Err(e) => output.push(e.to_string().as_bytes()), + } }), ); @@ -152,117 +159,242 @@ pub fn register(registry: &mut FunctionRegistry) { }), ); - registry.register_2_arg_core::, NullableType, NullableType, _, _>( + registry.register_combine_nullable_2_arg::( "get", |_, _| FunctionDomain::MayThrow, - vectorize_2_arg::, NullableType, NullableType>(|val, name, _| { - match (val, name) { - (Some(val), Some(name)) => { - if val.is_empty() || name.trim().is_empty() { - None - } else { - let name = String::from_utf8(name.to_vec()).map_err(|err| { + vectorize_with_builder_2_arg::>( + |val, name, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match std::str::from_utf8(name) { + Ok(name) => match get_by_name(val, name) { + Some(v) => { + output.push(&v); + } + None => { + output.push_null(); + } + }, + Err(err) => { + ctx.set_error( + output.len(), format!( "Unable convert name '{}' to string: {}", &String::from_utf8_lossy(name), err - ) - }).ok()?; - let json_path = JsonPathRef::String(Cow::Borrowed(&name)); - get_by_path(val, vec![json_path]) + ), + ); + output.push_null(); } } - (_, _) => None, - } - }), + }, + ), ); - registry.register_2_arg_core::, NullableType, NullableType, _, _>( + registry.register_combine_nullable_2_arg::( "get", |_, _| FunctionDomain::Full, - vectorize_2_arg::, NullableType, NullableType>(|val, idx, _| { - match (val, idx) { - (Some(val), Some(idx)) => { - if val.is_empty() { - None - } else { - let json_path = JsonPathRef::UInt64(idx); - get_by_path(val, vec![json_path]) + vectorize_with_builder_2_arg::>( + |val, idx, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; } } - (_, _) => None, - } - }), + if idx < 0 || idx > i32::MAX.into() { + output.push_null(); + } else { + match get_by_index(val, idx as i32) { + Some(v) => { + output.push(&v); + } + None => { + output.push_null(); + } + } + } + }, + ), ); - registry.register_2_arg_core::, NullableType, NullableType, _, _>( + registry.register_combine_nullable_2_arg::( "get_ignore_case", |_, _| FunctionDomain::MayThrow, - vectorize_2_arg::, NullableType, NullableType>(|val, name, _| { - match (val, name) { - (Some(val), Some(name)) => { - if val.is_empty() || name.trim().is_empty() { - None - } else { - let name = String::from_utf8(name.to_vec()).map_err(|err| { + vectorize_with_builder_2_arg::>( + |val, name, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match std::str::from_utf8(name) { + Ok(name) => match get_by_name_ignore_case(val, name) { + Some(v) => output.push(&v), + None => output.push_null(), + }, + Err(err) => { + ctx.set_error( + output.len(), format!( "Unable convert name '{}' to string: {}", &String::from_utf8_lossy(name), err - ) - }).ok()?; - get_by_name_ignore_case(val, &name) + ), + ); + output.push_null(); } } - (_, _) => None, - } - }), + }, + ), ); - registry.register_2_arg_core::, NullableType, NullableType, _, _>( + registry.register_combine_nullable_2_arg::( + "json_path_query_array", + |_, _| FunctionDomain::MayThrow, + vectorize_with_builder_2_arg::>( + |val, path, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match parse_json_path(path) { + Ok(json_path) => match get_by_path_array(val, json_path) { + Some(v) => output.push(&v), + None => output.push_null(), + }, + Err(_) => { + ctx.set_error( + output.len(), + format!("Invalid JSON Path '{}'", &String::from_utf8_lossy(path),), + ); + output.push_null(); + } + } + }, + ), + ); + + registry.register_combine_nullable_2_arg::( + "json_path_query_first", + |_, _| FunctionDomain::MayThrow, + vectorize_with_builder_2_arg::>( + |val, path, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match parse_json_path(path) { + Ok(json_path) => match get_by_path_first(val, json_path) { + Some(v) => output.push(&v), + None => output.push_null(), + }, + Err(_) => { + ctx.set_error( + output.len(), + format!("Invalid JSON Path '{}'", &String::from_utf8_lossy(path),), + ); + output.push_null(); + } + } + }, + ), + ); + + registry.register_combine_nullable_2_arg::( "get_path", |_, _| FunctionDomain::MayThrow, - vectorize_2_arg::, NullableType, NullableType>(|val, path, _| { - match (val, path) { - (Some(val), Some(path)) => { - if val.is_empty() || path.is_empty() { - None - } else { - let json_paths = parse_json_path(path).map_err(|err| { - format!( - "Invalid extraction path '{}': {}", - &String::from_utf8_lossy(path), - err - ) - }).ok()?; - get_by_path(val, json_paths) + vectorize_with_builder_2_arg::>( + |val, path, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; } } - (_, _) => None, - } - }), + match parse_json_path(path) { + Ok(json_path) => { + let mut vals = get_by_path(val, json_path); + if vals.is_empty() { + output.push_null(); + } else if vals.len() == 1 { + let v = vals.remove(0); + output.push(&v); + } else { + let mut array_val = Vec::new(); + let items: Vec<_> = vals.iter().map(|v| v.as_slice()).collect(); + build_array(items, &mut array_val).unwrap(); + output.push(&array_val); + } + } + Err(_) => { + ctx.set_error( + output.len(), + format!("Invalid JSON Path '{}'", &String::from_utf8_lossy(path),), + ); + output.push_null(); + } + } + }, + ), ); registry.register_combine_nullable_2_arg::( "json_extract_path_text", |_, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::>( - |s, path, output, _ctx| { - if s.is_empty() || path.is_empty() { - output.push_null(); - } else { - let value = jsonb::parse_value(s); - let json_paths = parse_json_path(path); - - match (value, json_paths) { - (Ok(value), Ok(json_paths)) => match value.get_by_path(&json_paths) { - Some(val) => { - let json_val = format!("{val}"); - output.push(json_val.as_bytes()); + |s, path, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match parse_value(s) { + Ok(val) => { + let mut buf = Vec::new(); + val.write_to_vec(&mut buf); + match parse_json_path(path) { + Ok(json_path) => { + let mut vals = get_by_path(&buf, json_path); + if vals.is_empty() { + output.push_null(); + } else if vals.len() == 1 { + let v = vals.remove(0); + let json_val = to_string(&v); + output.push(json_val.as_bytes()); + } else { + let mut array_val = Vec::new(); + let items: Vec<_> = vals.iter().map(|v| v.as_slice()).collect(); + build_array(items, &mut array_val).unwrap(); + let json_val = to_string(&array_val); + output.push(json_val.as_bytes()); + } } - None => output.push_null(), - }, - _ => output.push_null(), + Err(_) => { + ctx.set_error( + output.len(), + format!( + "Invalid JSON Path '{}'", + &String::from_utf8_lossy(path), + ), + ); + output.push_null(); + } + } + } + Err(err) => { + ctx.set_error(output.len(), err.to_string()); + output.push_null(); } } }, @@ -272,70 +404,82 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_combine_nullable_1_arg::( "as_boolean", |_| FunctionDomain::Full, - vectorize_with_builder_1_arg::>(|v, output, _| { - if v.is_empty() { - output.push_null(); - } else { - match as_bool(v) { - Some(val) => output.push(val), - None => output.push_null(), + vectorize_with_builder_1_arg::>(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; } } + match as_bool(v) { + Some(val) => output.push(val), + None => output.push_null(), + } }), ); registry.register_combine_nullable_1_arg::( "as_integer", |_| FunctionDomain::Full, - vectorize_with_builder_1_arg::>(|v, output, _| { - if v.is_empty() { - output.push_null(); - } else { - match as_i64(v) { - Some(val) => output.push(val), - None => output.push_null(), + vectorize_with_builder_1_arg::>(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; } } + match as_i64(v) { + Some(val) => output.push(val), + None => output.push_null(), + } }), ); registry.register_combine_nullable_1_arg::( "as_float", |_| FunctionDomain::Full, - vectorize_with_builder_1_arg::>(|v, output, _| { - if v.is_empty() { - output.push_null(); - } else { - match as_f64(v) { - Some(val) => output.push(val.into()), - None => output.push_null(), + vectorize_with_builder_1_arg::>(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; } } + match as_f64(v) { + Some(val) => output.push(val.into()), + None => output.push_null(), + } }), ); registry.register_combine_nullable_1_arg::( "as_string", |_| FunctionDomain::Full, - vectorize_with_builder_1_arg::>(|v, output, _| { - if v.is_empty() { - output.push_null(); - } else { - match as_str(v) { - Some(val) => output.push(val.as_bytes()), - None => output.push_null(), + vectorize_with_builder_1_arg::>(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; } } + match as_str(v) { + Some(val) => output.push(val.as_bytes()), + None => output.push_null(), + } }), ); registry.register_combine_nullable_1_arg::( "as_array", |_| FunctionDomain::Full, - vectorize_with_builder_1_arg::>(|v, output, _| { - if v.is_empty() { - output.push_null() - } else if is_array(v) { + vectorize_with_builder_1_arg::>(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + if is_array(v) { output.push(v.as_bytes()); } else { output.push_null() @@ -346,10 +490,14 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_combine_nullable_1_arg::( "as_object", |_| FunctionDomain::Full, - vectorize_with_builder_1_arg::>(|v, output, _| { - if v.is_empty() { - output.push_null() - } else if is_object(v) { + vectorize_with_builder_1_arg::>(|v, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + if is_object(v) { output.push(v.as_bytes()); } else { output.push_null() @@ -401,15 +549,17 @@ pub fn register(registry: &mut FunctionRegistry) { "to_boolean", |_| FunctionDomain::MayThrow, vectorize_with_builder_1_arg::(|val, output, ctx| { - if val.is_empty() { - output.push(false); - } else { - match to_bool(val) { - Ok(value) => output.push(value), - Err(err) => { - ctx.set_error(output.len(), err.to_string()); - output.push(false); - } + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push(false); + return; + } + } + match to_bool(val) { + Ok(value) => output.push(value), + Err(err) => { + ctx.set_error(output.len(), err.to_string()); + output.push(false); } } }), @@ -418,46 +568,55 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_combine_nullable_1_arg::( "try_to_boolean", |_| FunctionDomain::Full, - vectorize_with_builder_1_arg::>(|val, output, _| { - if val.is_empty() { - output.push_null(); - } else { + vectorize_with_builder_1_arg::>( + |val, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } match to_bool(val) { Ok(value) => { - output.validity.push(true); - output.builder.push(value); + output.push(value); } Err(_) => output.push_null(), } - } - }), + }, + ), ); registry.register_passthrough_nullable_1_arg::( "to_string", |_| FunctionDomain::MayThrow, vectorize_with_builder_1_arg::(|val, output, ctx| { - if val.is_empty() { - output.commit_row(); - } else { - match to_str(val) { - Ok(value) => output.put_slice(value.as_bytes()), - Err(err) => { - ctx.set_error(output.len(), err.to_string()); - } + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.commit_row(); + return; + } + } + match to_str(val) { + Ok(value) => output.put_slice(value.as_bytes()), + Err(err) => { + ctx.set_error(output.len(), err.to_string()); } - output.commit_row(); } + output.commit_row(); }), ); registry.register_combine_nullable_1_arg::( "try_to_string", |_| FunctionDomain::Full, - vectorize_with_builder_1_arg::>(|val, output, _| { - if val.is_empty() { - output.push_null(); - } else { + vectorize_with_builder_1_arg::>( + |val, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } match to_str(val) { Ok(value) => { output.validity.push(true); @@ -466,23 +625,25 @@ pub fn register(registry: &mut FunctionRegistry) { } Err(_) => output.push_null(), } - } - }), + }, + ), ); registry.register_passthrough_nullable_1_arg::( "to_date", |_| FunctionDomain::MayThrow, vectorize_with_builder_1_arg::(|val, output, ctx| { - if val.is_empty() { - output.push(0); - } else { - match as_str(val).and_then(|val| string_to_date(val.as_bytes(), ctx.tz.tz)) { - Some(d) => output.push(d.num_days_from_ce() - EPOCH_DAYS_FROM_CE), - None => { - ctx.set_error(output.len(), "unable to cast to type `DATE`"); - output.push(0); - } + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push(0); + return; + } + } + match as_str(val).and_then(|val| string_to_date(val.as_bytes(), ctx.tz.tz)) { + Some(d) => output.push(d.num_days_from_ce() - EPOCH_DAYS_FROM_CE), + None => { + ctx.set_error(output.len(), "unable to cast to type `DATE`"); + output.push(0); } } }), @@ -492,16 +653,17 @@ pub fn register(registry: &mut FunctionRegistry) { "try_to_date", |_| FunctionDomain::Full, vectorize_with_builder_1_arg::>(|val, output, ctx| { - if val.is_empty() { - output.push_null(); - } else { - match as_str(val) - .and_then(|str_value| string_to_date(str_value.as_bytes(), ctx.tz.tz)) - { - Some(date) => output.push(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE), - None => output.push_null(), + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; } } + match as_str(val).and_then(|str_value| string_to_date(str_value.as_bytes(), ctx.tz.tz)) + { + Some(date) => output.push(date.num_days_from_ce() - EPOCH_DAYS_FROM_CE), + None => output.push_null(), + } }), ); @@ -509,15 +671,17 @@ pub fn register(registry: &mut FunctionRegistry) { "to_timestamp", |_| FunctionDomain::MayThrow, vectorize_with_builder_1_arg::(|val, output, ctx| { - if val.is_empty() { - output.push(0); - } else { - match as_str(val).and_then(|val| string_to_timestamp(val.as_bytes(), ctx.tz.tz)) { - Some(ts) => output.push(ts.timestamp_micros()), - None => { - ctx.set_error(output.len(), "unable to cast to type `TIMESTAMP`"); - output.push(0); - } + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push(0); + return; + } + } + match as_str(val).and_then(|val| string_to_timestamp(val.as_bytes(), ctx.tz.tz)) { + Some(ts) => output.push(ts.timestamp_micros()), + None => { + ctx.set_error(output.len(), "unable to cast to type `TIMESTAMP`"); + output.push(0); } } }), @@ -528,20 +692,22 @@ pub fn register(registry: &mut FunctionRegistry) { |_| FunctionDomain::Full, vectorize_with_builder_1_arg::>( |val, output, ctx| { - if val.is_empty() { - output.push_null(); - } else { - match as_str(val) { - Some(str_val) => { - let timestamp = string_to_timestamp(str_val.as_bytes(), ctx.tz.tz) - .map(|ts| ts.timestamp_micros()); - match timestamp { - Some(timestamp) => output.push(timestamp), - None => output.push_null(), - } + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + match as_str(val) { + Some(str_val) => { + let timestamp = string_to_timestamp(str_val.as_bytes(), ctx.tz.tz) + .map(|ts| ts.timestamp_micros()); + match timestamp { + Some(timestamp) => output.push(timestamp), + None => output.push_null(), } - None => output.push_null(), } + None => output.push_null(), } }, ), @@ -557,27 +723,29 @@ pub fn register(registry: &mut FunctionRegistry) { |_| FunctionDomain::MayThrow, vectorize_with_builder_1_arg::>( move |val, output, ctx| { - if val.is_empty() { - output.push(NUM_TYPE::default()); - } else { - type Native = ::Native; + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push(NUM_TYPE::default()); + return; + } + } + type Native = ::Native; - let value: Option = if dest_type.is_float() { - to_f64(val).ok().and_then(num_traits::cast::cast) - } else if dest_type.is_signed() { - to_i64(val).ok().and_then(num_traits::cast::cast) - } else { - to_u64(val).ok().and_then(num_traits::cast::cast) - }; - match value { - Some(value) => output.push(value.into()), - None => { - ctx.set_error( - output.len(), - format!("unable to cast to type {dest_type}",), - ); - output.push(NUM_TYPE::default()); - } + let value: Option = if dest_type.is_float() { + to_f64(val).ok().and_then(num_traits::cast::cast) + } else if dest_type.is_signed() { + to_i64(val).ok().and_then(num_traits::cast::cast) + } else { + to_u64(val).ok().and_then(num_traits::cast::cast) + }; + match value { + Some(value) => output.push(value.into()), + None => { + ctx.set_error( + output.len(), + format!("unable to cast to type {dest_type}",), + ); + output.push(NUM_TYPE::default()); } } }, @@ -592,10 +760,14 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_with_builder_1_arg::< VariantType, NullableType>, - >(move |val, output, _| { - if val.is_empty() { - output.push_null(); - } else if dest_type.is_float() { + >(move |val, output, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(output.len()) { + output.push_null(); + return; + } + } + if dest_type.is_float() { if let Ok(value) = to_f64(val) { if let Some(new_value) = num_traits::cast::cast(value) { output.push(new_value); diff --git a/src/query/functions/src/srfs/mod.rs b/src/query/functions/src/srfs/mod.rs index 63928e8d6cf74..190cc7c73b532 100644 --- a/src/query/functions/src/srfs/mod.rs +++ b/src/query/functions/src/srfs/mod.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use common_expression::types::nullable::NullableColumn; +use common_expression::types::string::StringColumnBuilder; use common_expression::types::DataType; use common_expression::Column; use common_expression::Function; @@ -26,6 +27,8 @@ use common_expression::FunctionSignature; use common_expression::Scalar; use common_expression::ScalarRef; use common_expression::Value; +use jsonb::get_by_path; +use jsonb::jsonpath::parse_json_path; pub fn register(registry: &mut FunctionRegistry) { registry.properties.insert( @@ -50,6 +53,69 @@ pub fn register(registry: &mut FunctionRegistry) { } } }); + + registry.properties.insert( + "json_path_query".to_string(), + FunctionProperty::default().kind(FunctionKind::SRF), + ); + + registry.register_function_factory("json_path_query", |_, args_type| { + if args_type.len() != 2 { + return None; + } + if args_type[0].remove_nullable() != DataType::Variant + || args_type[1].remove_nullable() != DataType::String + { + return None; + } + + Some(Arc::new(Function { + signature: FunctionSignature { + name: "json_path_query".to_string(), + args_type: args_type.to_vec(), + return_type: DataType::Tuple(vec![DataType::Variant]), + }, + + eval: FunctionEval::SRF { + eval: Box::new(|args, ctx| { + let val_arg = args[0].clone().to_owned(); + let path_arg = args[1].clone().to_owned(); + (0..ctx.num_rows) + .map(|row| { + let val = val_arg.index(row).unwrap(); + let path = path_arg.index(row).unwrap(); + let mut builder = StringColumnBuilder::with_capacity(0, 0); + if let ScalarRef::String(path) = path { + match parse_json_path(path) { + Ok(json_path) => { + if let ScalarRef::Variant(val) = val { + let vals = get_by_path(val, json_path); + for val in vals { + builder.put(&val); + builder.commit_row(); + } + } + } + Err(_) => { + ctx.set_error( + 0, + format!( + "Invalid JSON Path '{}'", + &String::from_utf8_lossy(path), + ), + ); + } + } + } + let array = Column::Variant(builder.build()); + let array_len = array.len(); + (Value::Column(Column::Tuple(vec![array])), array_len) + }) + .collect() + }), + }, + })) + }); } fn build_unnest( @@ -65,8 +131,8 @@ fn build_unnest( return_type: DataType::Tuple(vec![DataType::Null]), }, eval: FunctionEval::SRF { - eval: Box::new(|_, num_rows| { - vec![(Value::Scalar(Scalar::Tuple(vec![Scalar::Null])), 0); num_rows] + eval: Box::new(|_, ctx| { + vec![(Value::Scalar(Scalar::Tuple(vec![Scalar::Null])), 0); ctx.num_rows] }), }, }) @@ -92,9 +158,9 @@ fn build_unnest( ))]), }, eval: FunctionEval::SRF { - eval: Box::new(|args, num_rows| { + eval: Box::new(|args, ctx| { let arg = args[0].clone().to_owned(); - (0..num_rows) + (0..ctx.num_rows) .map(|row| { fn unnest_column(col: Column) -> Column { match col { diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index 9dfc3e4ecfbc0..5a482f6c6fb11 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -1605,20 +1605,24 @@ Functions overloads: 1 geohash_encode(Float64 NULL, Float64 NULL) :: String NULL 2 geohash_encode(Float64, Float64, UInt8) :: String 3 geohash_encode(Float64 NULL, Float64 NULL, UInt8 NULL) :: String NULL -0 get(Variant NULL, String NULL) :: Variant NULL -1 get(Variant NULL, UInt64 NULL) :: Variant NULL -2 get(Array(Nothing) NULL, UInt64 NULL) :: NULL -3 get(Array(NULL) NULL, UInt64 NULL) :: NULL -4 get(Array(T0 NULL), UInt64) :: T0 NULL -5 get(Array(T0 NULL) NULL, UInt64 NULL) :: T0 NULL -6 get(Map(Nothing) NULL, T0 NULL) :: NULL -7 get(Map(T0, T1), T0) :: T1 NULL -8 get(Map(T0, T1) NULL, T0 NULL) :: T1 NULL -9 get FACTORY -10 get FACTORY +0 get(Variant, String) :: Variant NULL +1 get(Variant NULL, String NULL) :: Variant NULL +2 get(Variant, Int64) :: Variant NULL +3 get(Variant NULL, Int64 NULL) :: Variant NULL +4 get(Array(Nothing) NULL, UInt64 NULL) :: NULL +5 get(Array(NULL) NULL, UInt64 NULL) :: NULL +6 get(Array(T0 NULL), UInt64) :: T0 NULL +7 get(Array(T0 NULL) NULL, UInt64 NULL) :: T0 NULL +8 get(Map(Nothing) NULL, T0 NULL) :: NULL +9 get(Map(T0, T1), T0) :: T1 NULL +10 get(Map(T0, T1) NULL, T0 NULL) :: T1 NULL 11 get FACTORY -0 get_ignore_case(Variant NULL, String NULL) :: Variant NULL -0 get_path(Variant NULL, String NULL) :: Variant NULL +12 get FACTORY +13 get FACTORY +0 get_ignore_case(Variant, String) :: Variant NULL +1 get_ignore_case(Variant NULL, String NULL) :: Variant NULL +0 get_path(Variant, String) :: Variant NULL +1 get_path(Variant NULL, String NULL) :: Variant NULL 0 great_circle_angle(Float64, Float64, Float64, Float64) :: Float32 1 great_circle_angle(Float64 NULL, Float64 NULL, Float64 NULL, Float64 NULL) :: Float32 NULL 0 great_circle_distance(Float64, Float64, Float64, Float64) :: Float32 @@ -1723,6 +1727,11 @@ Functions overloads: 0 json_object FACTORY 0 json_object_keep_null FACTORY 0 json_object_keys(Variant NULL) :: Variant NULL +0 json_path_query FACTORY +0 json_path_query_array(Variant, String) :: Variant NULL +1 json_path_query_array(Variant NULL, String NULL) :: Variant NULL +0 json_path_query_first(Variant, String) :: Variant NULL +1 json_path_query_first(Variant NULL, String NULL) :: Variant NULL 0 left(String, UInt64) :: String 1 left(String NULL, UInt64 NULL) :: String NULL 0 length(Variant NULL) :: UInt32 NULL diff --git a/src/query/functions/tests/it/scalars/testdata/variant.txt b/src/query/functions/tests/it/scalars/testdata/variant.txt index 17871aa71808b..04e0d3cb97b1e 100644 --- a/src/query/functions/tests/it/scalars/testdata/variant.txt +++ b/src/query/functions/tests/it/scalars/testdata/variant.txt @@ -24,13 +24,12 @@ output domain : Undefined output : null -ast : parse_json(' ') -raw expr : parse_json(" \t") -checked expr : parse_json(" \t") -optimized expr : 0x2000000000000000 -output type : Variant -output domain : Undefined -output : null +error: + --> SQL:1:1 + | +1 | parse_json(' ') + | ^^^^^^^^^^^^^^^^^^ EOF while parsing a value, pos 2 while evaluating function `parse_json(" \t")` + ast : parse_json('true') @@ -131,12 +130,12 @@ evaluation: | Row 3 | "1234" | 1234 | +--------+------------------------+--------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x7472756566616c736531323334, offsets: [0, 4, 9, 9, 13] }, validity: [0b____1011] } | -| Output | NullableColumn { column: StringColumn { data: 0x20000000400000002000000030000000200000000000000020000000200000035004d2, offsets: [0, 8, 16, 24, 35] }, validity: [0b____1011] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: 0x7472756566616c736531323334, offsets: [0, 4, 9, 9, 13] }, validity: [0b____1011] } | +| Output | NullableColumn { column: StringColumn { data: 0x2000000040000000200000003000000020000000200000035004d2, offsets: [0, 8, 16, 16, 27] }, validity: [0b____1011] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ ast : try_parse_json(NULL) @@ -264,12 +263,12 @@ evaluation: | Row 3 | "1234" | 1234 | +--------+-----------------------+--------------------+ evaluation (internal): -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | NullableColumn { column: StringColumn { data: 0x7472756574747431323334, offsets: [0, 4, 7, 7, 11] }, validity: [0b____1011] } | -| Output | NullableColumn { column: StringColumn { data: 0x2000000040000000200000000000000020000000200000035004d2, offsets: [0, 8, 8, 16, 27] }, validity: [0b____1001] } | -+--------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: 0x7472756574747431323334, offsets: [0, 4, 7, 7, 11] }, validity: [0b____1011] } | +| Output | NullableColumn { column: StringColumn { data: 0x200000004000000020000000200000035004d2, offsets: [0, 8, 8, 8, 19] }, validity: [0b____1001] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ ast : check_json(NULL) @@ -481,7 +480,7 @@ evaluation (internal): ast : parse_json('null')[1] raw expr : get(parse_json("null"), 1) -checked expr : get(CAST(parse_json("null") AS Variant NULL), CAST(1_u8 AS UInt64 NULL)) +checked expr : get(parse_json("null"), to_int64(1_u8)) optimized expr : NULL output type : Variant NULL output domain : {NULL} @@ -490,7 +489,7 @@ output : NULL ast : parse_json('null')['k'] raw expr : get(parse_json("null"), "k") -checked expr : get(CAST(parse_json("null") AS Variant NULL), CAST("k" AS String NULL)) +checked expr : get(parse_json("null"), "k") optimized expr : NULL output type : Variant NULL output domain : {NULL} @@ -499,7 +498,7 @@ output : NULL ast : parse_json('[1,2,3,4]')[1] raw expr : get(parse_json("[1,2,3,4]"), 1) -checked expr : get(CAST(parse_json("[1,2,3,4]") AS Variant NULL), CAST(1_u8 AS UInt64 NULL)) +checked expr : get(parse_json("[1,2,3,4]"), to_int64(1_u8)) optimized expr : 0x20000000200000025002 output type : Variant NULL output domain : Undefined @@ -508,7 +507,7 @@ output : 2 ast : parse_json('[1,2,3,4]')[2+3] raw expr : get(parse_json("[1,2,3,4]"), plus(2, 3)) -checked expr : get(CAST(parse_json("[1,2,3,4]") AS Variant NULL), CAST(plus(2_u8, 3_u8) AS UInt64 NULL)) +checked expr : get(parse_json("[1,2,3,4]"), to_int64(plus(2_u8, 3_u8))) optimized expr : NULL output type : Variant NULL output domain : {NULL} @@ -517,7 +516,7 @@ output : NULL ast : parse_json('{"k":"v"}')['k'] raw expr : get(parse_json("{\"k\":\"v\"}"), "k") -checked expr : get(CAST(parse_json("{\"k\":\"v\"}") AS Variant NULL), CAST("k" AS String NULL)) +checked expr : get(parse_json("{\"k\":\"v\"}"), "k") optimized expr : 0x200000001000000176 output type : Variant NULL output domain : Undefined @@ -526,7 +525,7 @@ output : "v" ast : parse_json('{"k":"v"}')['x'] raw expr : get(parse_json("{\"k\":\"v\"}"), "x") -checked expr : get(CAST(parse_json("{\"k\":\"v\"}") AS Variant NULL), CAST("x" AS String NULL)) +checked expr : get(parse_json("{\"k\":\"v\"}"), "x") optimized expr : NULL output type : Variant NULL output domain : {NULL} @@ -535,7 +534,7 @@ output : NULL ast : CAST(('a', 'b') AS VARIANT)['2'] raw expr : get(CAST(tuple("a", "b") AS Variant), "2") -checked expr : get(CAST(to_variant(tuple("a", "b")) AS Variant NULL), CAST("2" AS String NULL)) +checked expr : get(to_variant(tuple("a", "b")), "2") optimized expr : 0x200000001000000162 output type : Variant NULL output domain : Undefined @@ -544,7 +543,7 @@ output : "b" ast : parse_json(s)[i] raw expr : get(parse_json(s::String), i::UInt64) -checked expr : get(CAST(parse_json(s) AS Variant NULL), CAST(i AS UInt64 NULL)) +checked expr : get(parse_json(s), to_int64(i)) evaluation: +--------+----------------------------------+---------+--------------+ | | s | i | Output | @@ -567,7 +566,7 @@ evaluation (internal): ast : parse_json(s)[i] raw expr : get(parse_json(s::String NULL), i::UInt64 NULL) -checked expr : get(parse_json(s), i) +checked expr : get(parse_json(s), CAST(i AS Int64 NULL)) evaluation: +--------+------------------------+------------------+--------------+ | | s | i | Output | @@ -591,7 +590,7 @@ evaluation (internal): ast : parse_json(s)[k] raw expr : get(parse_json(s::String), k::String) -checked expr : get(CAST(parse_json(s) AS Variant NULL), CAST(k AS String NULL)) +checked expr : get(parse_json(s), k) evaluation: +--------+------------------------+-------------+--------------+ | | s | k | Output | @@ -638,7 +637,7 @@ evaluation (internal): ast : get_ignore_case(parse_json('{"Aa":1, "aA":2, "aa":3}'), 'AA') raw expr : get_ignore_case(parse_json("{\"Aa\":1, \"aA\":2, \"aa\":3}"), "AA") -checked expr : get_ignore_case(CAST(parse_json("{\"Aa\":1, \"aA\":2, \"aa\":3}") AS Variant NULL), CAST("AA" AS String NULL)) +checked expr : get_ignore_case(parse_json("{\"Aa\":1, \"aA\":2, \"aa\":3}"), "AA") optimized expr : 0x20000000200000025001 output type : Variant NULL output domain : Undefined @@ -647,7 +646,7 @@ output : 1 ast : get_ignore_case(parse_json('{"Aa":1, "aA":2, "aa":3}'), 'aa') raw expr : get_ignore_case(parse_json("{\"Aa\":1, \"aA\":2, \"aa\":3}"), "aa") -checked expr : get_ignore_case(CAST(parse_json("{\"Aa\":1, \"aA\":2, \"aa\":3}") AS Variant NULL), CAST("aa" AS String NULL)) +checked expr : get_ignore_case(parse_json("{\"Aa\":1, \"aA\":2, \"aa\":3}"), "aa") optimized expr : 0x20000000200000025003 output type : Variant NULL output domain : Undefined @@ -656,7 +655,7 @@ output : 3 ast : get_ignore_case(parse_json('{"Aa":1, "aA":2, "aa":3}'), 'bb') raw expr : get_ignore_case(parse_json("{\"Aa\":1, \"aA\":2, \"aa\":3}"), "bb") -checked expr : get_ignore_case(CAST(parse_json("{\"Aa\":1, \"aA\":2, \"aa\":3}") AS Variant NULL), CAST("bb" AS String NULL)) +checked expr : get_ignore_case(parse_json("{\"Aa\":1, \"aA\":2, \"aa\":3}"), "bb") optimized expr : NULL output type : Variant NULL output domain : {NULL} @@ -665,7 +664,7 @@ output : NULL ast : get_ignore_case(parse_json(s), k) raw expr : get_ignore_case(parse_json(s::String), k::String) -checked expr : get_ignore_case(CAST(parse_json(s) AS Variant NULL), CAST(k AS String NULL)) +checked expr : get_ignore_case(parse_json(s), k) evaluation: +--------+------------------------+-------------+--------------+ | | s | k | Output | @@ -712,7 +711,7 @@ evaluation (internal): ast : get_path(parse_json('[[1,2],3]'), '[0]') raw expr : get_path(parse_json("[[1,2],3]"), "[0]") -checked expr : get_path(CAST(parse_json("[[1,2],3]") AS Variant NULL), CAST("[0]" AS String NULL)) +checked expr : get_path(parse_json("[[1,2],3]"), "[0]") optimized expr : 0x80000002200000022000000250015002 output type : Variant NULL output domain : Undefined @@ -721,7 +720,7 @@ output : [1,2] ast : get_path(parse_json('[[1,2],3]'), '[0][1]') raw expr : get_path(parse_json("[[1,2],3]"), "[0][1]") -checked expr : get_path(CAST(parse_json("[[1,2],3]") AS Variant NULL), CAST("[0][1]" AS String NULL)) +checked expr : get_path(parse_json("[[1,2],3]"), "[0][1]") optimized expr : 0x20000000200000025002 output type : Variant NULL output domain : Undefined @@ -730,7 +729,7 @@ output : 2 ast : get_path(parse_json('[1,2,3]'), '[0]') raw expr : get_path(parse_json("[1,2,3]"), "[0]") -checked expr : get_path(CAST(parse_json("[1,2,3]") AS Variant NULL), CAST("[0]" AS String NULL)) +checked expr : get_path(parse_json("[1,2,3]"), "[0]") optimized expr : 0x20000000200000025001 output type : Variant NULL output domain : Undefined @@ -739,7 +738,7 @@ output : 1 ast : get_path(parse_json('[1,2,3]'), 'k2:k3') raw expr : get_path(parse_json("[1,2,3]"), "k2:k3") -checked expr : get_path(CAST(parse_json("[1,2,3]") AS Variant NULL), CAST("k2:k3" AS String NULL)) +checked expr : get_path(parse_json("[1,2,3]"), "k2:k3") optimized expr : NULL output type : Variant NULL output domain : {NULL} @@ -748,7 +747,7 @@ output : NULL ast : get_path(parse_json('{"a":{"b":2}}'), '["a"]["b"]') raw expr : get_path(parse_json("{\"a\":{\"b\":2}}"), "[\"a\"][\"b\"]") -checked expr : get_path(CAST(parse_json("{\"a\":{\"b\":2}}") AS Variant NULL), CAST("[\"a\"][\"b\"]" AS String NULL)) +checked expr : get_path(parse_json("{\"a\":{\"b\":2}}"), "[\"a\"][\"b\"]") optimized expr : 0x20000000200000025002 output type : Variant NULL output domain : Undefined @@ -757,7 +756,7 @@ output : 2 ast : get_path(parse_json('{"a":{"b":2}}'), 'a:b') raw expr : get_path(parse_json("{\"a\":{\"b\":2}}"), "a:b") -checked expr : get_path(CAST(parse_json("{\"a\":{\"b\":2}}") AS Variant NULL), CAST("a:b" AS String NULL)) +checked expr : get_path(parse_json("{\"a\":{\"b\":2}}"), "a:b") optimized expr : 0x20000000200000025002 output type : Variant NULL output domain : Undefined @@ -766,7 +765,7 @@ output : 2 ast : get_path(parse_json('{"a":{"b":2}}'), '["a"]') raw expr : get_path(parse_json("{\"a\":{\"b\":2}}"), "[\"a\"]") -checked expr : get_path(CAST(parse_json("{\"a\":{\"b\":2}}") AS Variant NULL), CAST("[\"a\"]" AS String NULL)) +checked expr : get_path(parse_json("{\"a\":{\"b\":2}}"), "[\"a\"]") optimized expr : 0x400000011000000120000002625002 output type : Variant NULL output domain : Undefined @@ -775,7 +774,7 @@ output : {"b":2} ast : get_path(parse_json('{"a":{"b":2}}'), 'a') raw expr : get_path(parse_json("{\"a\":{\"b\":2}}"), "a") -checked expr : get_path(CAST(parse_json("{\"a\":{\"b\":2}}") AS Variant NULL), CAST("a" AS String NULL)) +checked expr : get_path(parse_json("{\"a\":{\"b\":2}}"), "a") optimized expr : 0x400000011000000120000002625002 output type : Variant NULL output domain : Undefined @@ -784,7 +783,7 @@ output : {"b":2} ast : get_path(parse_json(s), k) raw expr : get_path(parse_json(s::String), k::String) -checked expr : get_path(CAST(parse_json(s) AS Variant NULL), CAST(k AS String NULL)) +checked expr : get_path(parse_json(s), k) evaluation: +--------+---------------------------------+-------------------+--------------+ | | s | k | Output | @@ -2151,3 +2150,159 @@ evaluation (internal): +--------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +ast : json_path_query_array(parse_json('[1, 2, 3, 4, 5, 6]'), '$[0, 2 to last, 4]') +raw expr : json_path_query_array(parse_json("[1, 2, 3, 4, 5, 6]"), "$[0, 2 to last, 4]") +checked expr : json_path_query_array(parse_json("[1, 2, 3, 4, 5, 6]"), "$[0, 2 to last, 4]") +optimized expr : 0x80000006200000022000000220000002200000022000000220000002500150035004500550065005 +output type : Variant NULL +output domain : Undefined +output : [1,3,4,5,6,5] + + +ast : json_path_query_array(parse_json('[1, 2, 3, 4, 5, 6]'), '$[100]') +raw expr : json_path_query_array(parse_json("[1, 2, 3, 4, 5, 6]"), "$[100]") +checked expr : json_path_query_array(parse_json("[1, 2, 3, 4, 5, 6]"), "$[100]") +optimized expr : 0x80000000 +output type : Variant NULL +output domain : Undefined +output : [] + + +ast : json_path_query_array(parse_json('[{"a": 1}, {"a": 2}]'), '$[*].a') +raw expr : json_path_query_array(parse_json("[{\"a\": 1}, {\"a\": 2}]"), "$[*].a") +checked expr : json_path_query_array(parse_json("[{\"a\": 1}, {\"a\": 2}]"), "$[*].a") +optimized expr : 0x80000002200000022000000250015002 +output type : Variant NULL +output domain : Undefined +output : [1,2] + + +ast : json_path_query_array(parse_json('[{"a": 1}, {"a": 2}]'), '$[*].a ? (@ == 1)') +raw expr : json_path_query_array(parse_json("[{\"a\": 1}, {\"a\": 2}]"), "$[*].a ? (@ == 1)") +checked expr : json_path_query_array(parse_json("[{\"a\": 1}, {\"a\": 2}]"), "$[*].a ? (@ == 1)") +optimized expr : 0x80000001200000025001 +output type : Variant NULL +output domain : Undefined +output : [1] + + +ast : json_path_query_array(parse_json('[{"a": 1}, {"a": 2}]'), '$[*].a ? (@ > 10)') +raw expr : json_path_query_array(parse_json("[{\"a\": 1}, {\"a\": 2}]"), "$[*].a ? (@ > 10)") +checked expr : json_path_query_array(parse_json("[{\"a\": 1}, {\"a\": 2}]"), "$[*].a ? (@ > 10)") +optimized expr : 0x80000000 +output type : Variant NULL +output domain : Undefined +output : [] + + +ast : json_path_query_array(parse_json('[{"a": {"b":10}}, {"a": 2}]'), '$[*].a.b') +raw expr : json_path_query_array(parse_json("[{\"a\": {\"b\":10}}, {\"a\": 2}]"), "$[*].a.b") +checked expr : json_path_query_array(parse_json("[{\"a\": {\"b\":10}}, {\"a\": 2}]"), "$[*].a.b") +optimized expr : 0x8000000120000002500a +output type : Variant NULL +output domain : Undefined +output : [10] + + +ast : json_path_query_array(parse_json(s), p) +raw expr : json_path_query_array(parse_json(s::String NULL), p::String) +checked expr : json_path_query_array(parse_json(s), CAST(p AS String NULL)) +evaluation: ++--------+-------------------------+------------------+--------------+ +| | s | p | Output | ++--------+-------------------------+------------------+--------------+ +| Type | String NULL | String | Variant NULL | +| Domain | {""..="true"} ∪ {NULL} | {"$.a"..="$[0]"} | Unknown | +| Row 0 | "true" | "$[0]" | [] | +| Row 1 | "[{\"k\":1},{\"k\":2}]" | "$[*].k" | [1,2] | +| Row 2 | NULL | "$.a" | NULL | +| Row 3 | "[1,2,3,4]" | "$[0,2]" | [1,3] | ++--------+-------------------------+------------------+--------------+ +evaluation (internal): ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: 0x747275655b7b226b223a317d2c7b226b223a327d5d5b312c322c332c345d, offsets: [0, 4, 21, 21, 30] }, validity: [0b____1011] } | +| p | StringColumn { data: 0x245b305d245b2a5d2e6b242e61245b302c325d, offsets: [0, 4, 10, 13, 19] } | +| Output | NullableColumn { column: StringColumn { data: 0x800000008000000220000002200000025001500280000002200000022000000250015003, offsets: [0, 4, 20, 20, 36] }, validity: [0b____1011] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + +ast : json_path_query_first(parse_json('[1, 2, 3, 4, 5, 6]'), '$[0, 2 to last, 4]') +raw expr : json_path_query_first(parse_json("[1, 2, 3, 4, 5, 6]"), "$[0, 2 to last, 4]") +checked expr : json_path_query_first(parse_json("[1, 2, 3, 4, 5, 6]"), "$[0, 2 to last, 4]") +optimized expr : 0x20000000200000025001 +output type : Variant NULL +output domain : Undefined +output : 1 + + +ast : json_path_query_first(parse_json('[1, 2, 3, 4, 5, 6]'), '$[100]') +raw expr : json_path_query_first(parse_json("[1, 2, 3, 4, 5, 6]"), "$[100]") +checked expr : json_path_query_first(parse_json("[1, 2, 3, 4, 5, 6]"), "$[100]") +optimized expr : NULL +output type : Variant NULL +output domain : {NULL} +output : NULL + + +ast : json_path_query_first(parse_json('[{"a": 1}, {"a": 2}]'), '$[*].a') +raw expr : json_path_query_first(parse_json("[{\"a\": 1}, {\"a\": 2}]"), "$[*].a") +checked expr : json_path_query_first(parse_json("[{\"a\": 1}, {\"a\": 2}]"), "$[*].a") +optimized expr : 0x20000000200000025001 +output type : Variant NULL +output domain : Undefined +output : 1 + + +ast : json_path_query_first(parse_json('[{"a": 1}, {"a": 2}]'), '$[*].a ? (@ == 1)') +raw expr : json_path_query_first(parse_json("[{\"a\": 1}, {\"a\": 2}]"), "$[*].a ? (@ == 1)") +checked expr : json_path_query_first(parse_json("[{\"a\": 1}, {\"a\": 2}]"), "$[*].a ? (@ == 1)") +optimized expr : 0x20000000200000025001 +output type : Variant NULL +output domain : Undefined +output : 1 + + +ast : json_path_query_first(parse_json('[{"a": 1}, {"a": 2}]'), '$[*].a ? (@ > 10)') +raw expr : json_path_query_first(parse_json("[{\"a\": 1}, {\"a\": 2}]"), "$[*].a ? (@ > 10)") +checked expr : json_path_query_first(parse_json("[{\"a\": 1}, {\"a\": 2}]"), "$[*].a ? (@ > 10)") +optimized expr : NULL +output type : Variant NULL +output domain : {NULL} +output : NULL + + +ast : json_path_query_first(parse_json('[{"a": {"b":10}}, {"a": 2}]'), '$[*].a.b') +raw expr : json_path_query_first(parse_json("[{\"a\": {\"b\":10}}, {\"a\": 2}]"), "$[*].a.b") +checked expr : json_path_query_first(parse_json("[{\"a\": {\"b\":10}}, {\"a\": 2}]"), "$[*].a.b") +optimized expr : 0x2000000020000002500a +output type : Variant NULL +output domain : Undefined +output : 10 + + +ast : json_path_query_first(parse_json(s), p) +raw expr : json_path_query_first(parse_json(s::String NULL), p::String) +checked expr : json_path_query_first(parse_json(s), CAST(p AS String NULL)) +evaluation: ++--------+-------------------------+------------------+--------------+ +| | s | p | Output | ++--------+-------------------------+------------------+--------------+ +| Type | String NULL | String | Variant NULL | +| Domain | {""..="true"} ∪ {NULL} | {"$.a"..="$[0]"} | Unknown | +| Row 0 | "true" | "$[0]" | NULL | +| Row 1 | "[{\"k\":1},{\"k\":2}]" | "$[*].k" | 1 | +| Row 2 | NULL | "$.a" | NULL | +| Row 3 | "[1,2,3,4]" | "$[0,2]" | 1 | ++--------+-------------------------+------------------+--------------+ +evaluation (internal): ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | NullableColumn { column: StringColumn { data: 0x747275655b7b226b223a317d2c7b226b223a327d5d5b312c322c332c345d, offsets: [0, 4, 21, 21, 30] }, validity: [0b____1011] } | +| p | StringColumn { data: 0x245b305d245b2a5d2e6b242e61245b302c325d, offsets: [0, 4, 10, 13, 19] } | +| Output | NullableColumn { column: StringColumn { data: 0x2000000020000002500120000000200000025001, offsets: [0, 0, 10, 10, 20] }, validity: [0b____1010] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + diff --git a/src/query/functions/tests/it/scalars/variant.rs b/src/query/functions/tests/it/scalars/variant.rs index 2974a45aabc1b..750811825c06d 100644 --- a/src/query/functions/tests/it/scalars/variant.rs +++ b/src/query/functions/tests/it/scalars/variant.rs @@ -39,6 +39,8 @@ fn test_variant() { test_try_to_type(file); test_json_object(file); test_json_object_keep_null(file); + test_json_path_query_array(file); + test_json_path_query_first(file); } fn test_parse_json(file: &mut impl Write) { @@ -583,3 +585,97 @@ fn test_json_object_keep_null(file: &mut impl Write) { ), ]); } + +fn test_json_path_query_array(file: &mut impl Write) { + run_ast( + file, + "json_path_query_array(parse_json('[1, 2, 3, 4, 5, 6]'), '$[0, 2 to last, 4]')", + &[], + ); + run_ast( + file, + "json_path_query_array(parse_json('[1, 2, 3, 4, 5, 6]'), '$[100]')", + &[], + ); + run_ast( + file, + "json_path_query_array(parse_json('[{\"a\": 1}, {\"a\": 2}]'), '$[*].a')", + &[], + ); + run_ast( + file, + "json_path_query_array(parse_json('[{\"a\": 1}, {\"a\": 2}]'), '$[*].a ? (@ == 1)')", + &[], + ); + run_ast( + file, + "json_path_query_array(parse_json('[{\"a\": 1}, {\"a\": 2}]'), '$[*].a ? (@ > 10)')", + &[], + ); + run_ast( + file, + "json_path_query_array(parse_json('[{\"a\": {\"b\":10}}, {\"a\": 2}]'), '$[*].a.b')", + &[], + ); + + run_ast(file, "json_path_query_array(parse_json(s), p)", &[ + ( + "s", + StringType::from_data_with_validity( + &["true", "[{\"k\":1},{\"k\":2}]", "", "[1,2,3,4]"], + vec![true, true, false, true], + ), + ), + ( + "p", + StringType::from_data(vec!["$[0]", "$[*].k", "$.a", "$[0,2]"]), + ), + ]); +} + +fn test_json_path_query_first(file: &mut impl Write) { + run_ast( + file, + "json_path_query_first(parse_json('[1, 2, 3, 4, 5, 6]'), '$[0, 2 to last, 4]')", + &[], + ); + run_ast( + file, + "json_path_query_first(parse_json('[1, 2, 3, 4, 5, 6]'), '$[100]')", + &[], + ); + run_ast( + file, + "json_path_query_first(parse_json('[{\"a\": 1}, {\"a\": 2}]'), '$[*].a')", + &[], + ); + run_ast( + file, + "json_path_query_first(parse_json('[{\"a\": 1}, {\"a\": 2}]'), '$[*].a ? (@ == 1)')", + &[], + ); + run_ast( + file, + "json_path_query_first(parse_json('[{\"a\": 1}, {\"a\": 2}]'), '$[*].a ? (@ > 10)')", + &[], + ); + run_ast( + file, + "json_path_query_first(parse_json('[{\"a\": {\"b\":10}}, {\"a\": 2}]'), '$[*].a.b')", + &[], + ); + + run_ast(file, "json_path_query_first(parse_json(s), p)", &[ + ( + "s", + StringType::from_data_with_validity( + &["true", "[{\"k\":1},{\"k\":2}]", "", "[1,2,3,4]"], + vec![true, true, false, true], + ), + ), + ( + "p", + StringType::from_data(vec!["$[0]", "$[*].k", "$.a", "$[0,2]"]), + ), + ]); +} diff --git a/tests/sqllogictests/suites/query/02_function/02_0051_function_semi_structureds_get b/tests/sqllogictests/suites/query/02_function/02_0051_function_semi_structureds_get index 277c8b3dc90ce..fa33cdd19dc10 100644 --- a/tests/sqllogictests/suites/query/02_function/02_0051_function_semi_structureds_get +++ b/tests/sqllogictests/suites/query/02_function/02_0051_function_semi_structureds_get @@ -292,6 +292,126 @@ select id, get(arr, 5) from t4 statement error 1001 select id, get(arr, 'a') from t4 +query IT +select id, json_path_query(arr, '$[2, 1 to last -1]') from t1 +---- +1 3 +1 2 +1 3 + +query IT +select id, json_path_query(arr, '$[*]?(@ > 1 && @ <= 3)') from t1 +---- +1 2 +1 3 + +query IT +select id, json_path_query(arr, '$[*][1]') from t1 +---- +1 "b" + +query IT +select id, json_path_query(obj, '$.a') from t2 +---- +1 1 + +query IT +select id, json_path_query(obj, '$.a') from t2 +---- +1 1 + +query IT +select id, json_path_query(obj, '$.b.c') from t2 +---- +1 2 + +query IT +select id, json_path_query(obj, '$.b?(@.c == 2)') from t2 +---- +1 {"c":2} + +query IT +select id, json_path_query(obj, '$.b?(@.c > 2)') from t2 +---- + +statement error 1001 +select id, json_path_query(obj, '--') from t2 + +query IT +select id, json_path_query_array(arr, '$[2, 1 to last -1]') from t1 +---- +1 [3,2,3] + +query IT +select id, json_path_query_array(arr, '$[*]?(@ > 1 && @ <= 3)') from t1 +---- +1 [2,3] + +query IT +select id, json_path_query_array(arr, '$[*][1]') from t1 +---- +1 ["b"] + +query IT +select id, json_path_query_array(obj, '$.a') from t2 +---- +1 [1] + +query IT +select id, json_path_query_array(obj, '$.b.c') from t2 +---- +1 [2] + +query IT +select id, json_path_query_array(obj, '$.b?(@.c == 2)') from t2 +---- +1 [{"c":2}] + +query IT +select id, json_path_query_array(obj, '$.b?(@.c > 2)') from t2 +---- +1 [] + +statement error 1001 +select id, json_path_query_array(obj, '--') from t2 + +query IT +select id, json_path_query_first(arr, '$[2, 1 to last -1]') from t1 +---- +1 3 + +query IT +select id, json_path_query_first(arr, '$[*]?(@ > 1 && @ <= 3)') from t1 +---- +1 2 + +query IT +select id, json_path_query_first(arr, '$[*][1]') from t1 +---- +1 "b" + +query IT +select id, json_path_query_first(obj, '$.a') from t2 +---- +1 1 + +query IT +select id, json_path_query_first(obj, '$.b.c') from t2 +---- +1 2 + +query IT +select id, json_path_query_first(obj, '$.b?(@.c == 2)') from t2 +---- +1 {"c":2} + +query IT +select id, json_path_query_first(obj, '$.b?(@.c > 2)') from t2 +---- +1 NULL + +statement error 1001 +select id, json_path_query_first(obj, '--') from t2 + statement ok DROP DATABASE IF EXISTS db1 -