diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index e805f4212a1c4..30a0a48aabf67 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -85,6 +85,7 @@ set(SRC_FILES llvm_generator.cc llvm_types.cc like_holder.cc + json_holder.cc literal_holder.cc projector.cc regex_util.cc @@ -233,6 +234,7 @@ add_gandiva_test(internals-test lru_cache_test.cc to_date_holder_test.cc simple_arena_test.cc + json_holder_test.cc like_holder_test.cc replace_holder_test.cc decimal_type_util_test.cc diff --git a/cpp/src/gandiva/function_holder_registry.h b/cpp/src/gandiva/function_holder_registry.h index ced1538915dd5..5c2d978b3921a 100644 --- a/cpp/src/gandiva/function_holder_registry.h +++ b/cpp/src/gandiva/function_holder_registry.h @@ -25,6 +25,7 @@ #include "arrow/status.h" #include "gandiva/function_holder.h" +#include "gandiva/json_holder.h" #include "gandiva/like_holder.h" #include "gandiva/node.h" #include "gandiva/random_generator_holder.h" @@ -64,6 +65,7 @@ class FunctionHolderRegistry { static map_type maker_map = { {"like", LAMBDA_MAKER(LikeHolder)}, {"ilike", LAMBDA_MAKER(LikeHolder)}, + {"get_json_obejct", LAMBDA_MAKER(JsonHolder)}, {"to_date", LAMBDA_MAKER(ToDateHolder)}, {"random", LAMBDA_MAKER(RandomGeneratorHolder)}, {"rand", LAMBDA_MAKER(RandomGeneratorHolder)}, diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index b4a3157520ddb..0d29a23437101 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -260,6 +260,10 @@ std::vector GetStringFunctionRegistry() { NativeFunction("ilike", {}, DataTypeVector{utf8(), utf8()}, boolean(), kResultNullIfNull, "gdv_fn_ilike_utf8_utf8", NativeFunction::kNeedsFunctionHolder), + + NativeFunction("get_json_object", {}, DataTypeVector{utf8(), utf8()}, utf8(), + kResultNullIfNull, "gdv_fn_get_json_object_utf8_utf8", + NativeFunction::kNeedsFunctionHolder), NativeFunction("ltrim", {}, DataTypeVector{utf8(), utf8()}, utf8(), kResultNullIfNull, "ltrim_utf8_utf8", NativeFunction::kNeedsContext), diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index ed31c55fd5c4c..55c9c5468ad54 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -37,6 +37,7 @@ #include "gandiva/formatting_utils.h" #include "gandiva/hash_utils.h" #include "gandiva/in_holder.h" +#include "gandiva/json_holder.h" #include "gandiva/like_holder.h" #include "gandiva/precompiled/types.h" #include "gandiva/random_generator_holder.h" @@ -65,6 +66,12 @@ static char mask_array[256] = { 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', (char)123, (char)124, (char)125, (char)126, (char)127}; +const uint8_t* gdv_fn_get_json_object_utf8_utf8(int64_t ptr, const char* data, int data_len, + const char* pattern, int pattern_len, int32_t* out_len) { + gandiva::JsonHolder* holder = reinterpret_cast(ptr); + return (*holder)(std::string(data, data_len), std::string(pattern, pattern_len), out_len); +} + bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len, const char* pattern, int pattern_len) { gandiva::LikeHolder* holder = reinterpret_cast(ptr); @@ -1284,6 +1291,18 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const { types->i8_ptr_type() /*return_type*/, args, reinterpret_cast(gdv_fn_dec_to_string)); + // gdv_fn_get_json_object_utf8_utf8 + args = {types->i64_type(), // int64_t ptr + types->i8_ptr_type(), // const char* data + types->i32_type(), // int data_len + types->i8_ptr_type(), // const char* pattern + types->i32_type(), // int pattern_len + types->i32_ptr_type()}; // int out_len + + engine->AddGlobalMappingForFunc("gdv_fn_get_json_object_utf8_utf8", + types->i8_ptr_type() /*return_type*/, args, + reinterpret_cast(gdv_fn_get_json_object_utf8_utf8)); + // gdv_fn_like_utf8_utf8 args = {types->i64_type(), // int64_t ptr types->i8_ptr_type(), // const char* data diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h index e652e5c5e97f2..e6fdd6db6baea 100644 --- a/cpp/src/gandiva/gdv_function_stubs.h +++ b/cpp/src/gandiva/gdv_function_stubs.h @@ -68,6 +68,9 @@ bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len, bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len, const char* pattern, int pattern_len); + +const uint8_t* gdv_fn_get_json_object_utf8_utf8(int64_t ptr, const char* data, int data_len, + const char* pattern, int pattern_len, int32_t* out_len); int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* data, int data_len, bool in1_validity, diff --git a/cpp/src/gandiva/json_holder.cc b/cpp/src/gandiva/json_holder.cc new file mode 100644 index 0000000000000..5d640aab838ea --- /dev/null +++ b/cpp/src/gandiva/json_holder.cc @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/json_holder.h" + +#include +#include + +#include "gandiva/node.h" +#include "gandiva/regex_util.h" + +namespace gandiva { + +Status JsonHolder::Make(const FunctionNode& node, std::shared_ptr* holder) { + return Make(holder); +} + +Status JsonHolder::Make(std::shared_ptr* holder) { + *holder = std::shared_ptr(new JsonHolder()); + return Status::OK(); +} + +const uint8_t* JsonHolder::operator()(const std::string& json_str, const std::string& json_path, int32_t* out_len) { + + std::unique_ptr parser; + (arrow::json::BlockParser::Make(parse_options_, &parser)); + + (parser->Parse(std::make_shared(json_str))); + std::shared_ptr parsed; + (parser->Finish(&parsed)); + auto struct_parsed = std::dynamic_pointer_cast(parsed); + + //json_path example: $.col_14, will extract col_14 here + // needs to gurad failure here + auto col_name = json_path.substr(2); + + auto dict_parsed = std::dynamic_pointer_cast( + struct_parsed->GetFieldByName(col_name)); + auto dict_array = dict_parsed->dictionary(); + auto uft8_array = std::dynamic_pointer_cast(dict_array); + + return uft8_array->GetValue(0, out_len); +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/json_holder.h b/cpp/src/gandiva/json_holder.h new file mode 100644 index 0000000000000..4cf289465db89 --- /dev/null +++ b/cpp/src/gandiva/json_holder.h @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "arrow/json/api.h" +#include "arrow/json/parser.h" +#include "arrow/status.h" +#include "gandiva/function_holder.h" +#include "gandiva/node.h" +#include "gandiva/visibility.h" + +namespace gandiva { + +/// Function Holder for SQL 'get_json_object' +class GANDIVA_EXPORT JsonHolder : public FunctionHolder { + public: + JsonHolder() {} + ~JsonHolder() override = default; + + static Status Make(const FunctionNode& node, std::shared_ptr* holder); + static Status Make(std::shared_ptr* holder); + + //TODO(): should try to return const uint8_t * + const uint8_t* operator()(const std::string& json_str, const std::string& json_path, int32_t* out_len); + + arrow::json::ParseOptions parse_options_ = arrow::json::ParseOptions::Defaults(); + arrow::json::ReadOptions read_options_ = arrow::json::ReadOptions::Defaults(); +}; + +} // namespace gandiva diff --git a/cpp/src/gandiva/json_holder_test.cc b/cpp/src/gandiva/json_holder_test.cc new file mode 100644 index 0000000000000..63f4370c33720 --- /dev/null +++ b/cpp/src/gandiva/json_holder_test.cc @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/json_holder.h" + +#include + +#include +#include + +#include "gandiva/regex_util.h" + +namespace gandiva { + +class TestJsonHolder : public ::testing::Test {}; + +TEST_F(TestJsonHolder, TestJson) { + std::shared_ptr json_holder; + + auto status = JsonHolder::Make(&json_holder); + EXPECT_EQ(status.ok(), true) << status.message(); + + auto& get_json_object = *json_holder; + + int32_t out_len; + const uint8_t* data = get_json_object(R"({"hello": 3.5 })", "$.hello", &out_len); + EXPECT_EQ(std::string((char*)data, out_len), "3.5"); + +} + +} // namespace gandiva