Skip to content

Commit

Permalink
implement json functions (apache#49)
Browse files Browse the repository at this point in the history
Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>
  • Loading branch information
zhouyuan authored and zhztheplayer committed Feb 28, 2022
1 parent 557a866 commit 0dcc45c
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 0 deletions.
2 changes: 2 additions & 0 deletions cpp/src/gandiva/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ set(SRC_FILES
llvm_generator.cc
llvm_types.cc
like_holder.cc
json_holder.cc
literal_holder.cc
projector.cc
regex_util.cc
Expand Down Expand Up @@ -233,6 +234,7 @@ add_gandiva_test(internals-test
lru_cache_test.cc
to_date_holder_test.cc
simple_arena_test.cc
json_holder_test.cc
like_holder_test.cc
replace_holder_test.cc
decimal_type_util_test.cc
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/gandiva/function_holder_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "arrow/status.h"

#include "gandiva/function_holder.h"
#include "gandiva/json_holder.h"
#include "gandiva/like_holder.h"
#include "gandiva/node.h"
#include "gandiva/random_generator_holder.h"
Expand Down Expand Up @@ -64,6 +65,7 @@ class FunctionHolderRegistry {
static map_type maker_map = {
{"like", LAMBDA_MAKER(LikeHolder)},
{"ilike", LAMBDA_MAKER(LikeHolder)},
{"get_json_obejct", LAMBDA_MAKER(JsonHolder)},
{"to_date", LAMBDA_MAKER(ToDateHolder)},
{"random", LAMBDA_MAKER(RandomGeneratorHolder)},
{"rand", LAMBDA_MAKER(RandomGeneratorHolder)},
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/gandiva/function_registry_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,10 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
NativeFunction("ilike", {}, DataTypeVector{utf8(), utf8()}, boolean(),
kResultNullIfNull, "gdv_fn_ilike_utf8_utf8",
NativeFunction::kNeedsFunctionHolder),

NativeFunction("get_json_object", {}, DataTypeVector{utf8(), utf8()}, utf8(),
kResultNullIfNull, "gdv_fn_get_json_object_utf8_utf8",
NativeFunction::kNeedsFunctionHolder),

NativeFunction("ltrim", {}, DataTypeVector{utf8(), utf8()}, utf8(),
kResultNullIfNull, "ltrim_utf8_utf8", NativeFunction::kNeedsContext),
Expand Down
19 changes: 19 additions & 0 deletions cpp/src/gandiva/gdv_function_stubs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "gandiva/formatting_utils.h"
#include "gandiva/hash_utils.h"
#include "gandiva/in_holder.h"
#include "gandiva/json_holder.h"
#include "gandiva/like_holder.h"
#include "gandiva/precompiled/types.h"
#include "gandiva/random_generator_holder.h"
Expand Down Expand Up @@ -65,6 +66,12 @@ static char mask_array[256] = {
'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x',
'x', 'x', 'x', (char)123, (char)124, (char)125, (char)126, (char)127};

const uint8_t* gdv_fn_get_json_object_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len, int32_t* out_len) {
gandiva::JsonHolder* holder = reinterpret_cast<gandiva::JsonHolder*>(ptr);
return (*holder)(std::string(data, data_len), std::string(pattern, pattern_len), out_len);
}

bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len) {
gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
Expand Down Expand Up @@ -1284,6 +1291,18 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
types->i8_ptr_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_dec_to_string));

// gdv_fn_get_json_object_utf8_utf8
args = {types->i64_type(), // int64_t ptr
types->i8_ptr_type(), // const char* data
types->i32_type(), // int data_len
types->i8_ptr_type(), // const char* pattern
types->i32_type(), // int pattern_len
types->i32_ptr_type()}; // int out_len

engine->AddGlobalMappingForFunc("gdv_fn_get_json_object_utf8_utf8",
types->i8_ptr_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_get_json_object_utf8_utf8));

// gdv_fn_like_utf8_utf8
args = {types->i64_type(), // int64_t ptr
types->i8_ptr_type(), // const char* data
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/gandiva/gdv_function_stubs.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,

bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len);

const uint8_t* gdv_fn_get_json_object_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len, int32_t* out_len);

int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* data,
int data_len, bool in1_validity,
Expand Down
59 changes: 59 additions & 0 deletions cpp/src/gandiva/json_holder.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "gandiva/json_holder.h"

#include <iostream>
#include <regex>

#include "gandiva/node.h"
#include "gandiva/regex_util.h"

namespace gandiva {

Status JsonHolder::Make(const FunctionNode& node, std::shared_ptr<JsonHolder>* holder) {
return Make(holder);
}

Status JsonHolder::Make(std::shared_ptr<JsonHolder>* holder) {
*holder = std::shared_ptr<JsonHolder>(new JsonHolder());
return Status::OK();
}

const uint8_t* JsonHolder::operator()(const std::string& json_str, const std::string& json_path, int32_t* out_len) {

std::unique_ptr<arrow::json::BlockParser> parser;
(arrow::json::BlockParser::Make(parse_options_, &parser));

(parser->Parse(std::make_shared<arrow::Buffer>(json_str)));
std::shared_ptr<arrow::Array> parsed;
(parser->Finish(&parsed));
auto struct_parsed = std::dynamic_pointer_cast<arrow::StructArray>(parsed);

//json_path example: $.col_14, will extract col_14 here
// needs to gurad failure here
auto col_name = json_path.substr(2);

auto dict_parsed = std::dynamic_pointer_cast<arrow::DictionaryArray>(
struct_parsed->GetFieldByName(col_name));
auto dict_array = dict_parsed->dictionary();
auto uft8_array = std::dynamic_pointer_cast<arrow::BinaryArray>(dict_array);

return uft8_array->GetValue(0, out_len);
}

} // namespace gandiva
48 changes: 48 additions & 0 deletions cpp/src/gandiva/json_holder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <memory>
#include <string>

#include "arrow/json/api.h"
#include "arrow/json/parser.h"
#include "arrow/status.h"
#include "gandiva/function_holder.h"
#include "gandiva/node.h"
#include "gandiva/visibility.h"

namespace gandiva {

/// Function Holder for SQL 'get_json_object'
class GANDIVA_EXPORT JsonHolder : public FunctionHolder {
public:
JsonHolder() {}
~JsonHolder() override = default;

static Status Make(const FunctionNode& node, std::shared_ptr<JsonHolder>* holder);
static Status Make(std::shared_ptr<JsonHolder>* holder);

//TODO(): should try to return const uint8_t *
const uint8_t* operator()(const std::string& json_str, const std::string& json_path, int32_t* out_len);

arrow::json::ParseOptions parse_options_ = arrow::json::ParseOptions::Defaults();
arrow::json::ReadOptions read_options_ = arrow::json::ReadOptions::Defaults();
};

} // namespace gandiva
45 changes: 45 additions & 0 deletions cpp/src/gandiva/json_holder_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "gandiva/json_holder.h"

#include <gtest/gtest.h>

#include <memory>
#include <vector>

#include "gandiva/regex_util.h"

namespace gandiva {

class TestJsonHolder : public ::testing::Test {};

TEST_F(TestJsonHolder, TestJson) {
std::shared_ptr<JsonHolder> json_holder;

auto status = JsonHolder::Make(&json_holder);
EXPECT_EQ(status.ok(), true) << status.message();

auto& get_json_object = *json_holder;

int32_t out_len;
const uint8_t* data = get_json_object(R"({"hello": 3.5 })", "$.hello", &out_len);
EXPECT_EQ(std::string((char*)data, out_len), "3.5");

}

} // namespace gandiva

0 comments on commit 0dcc45c

Please sign in to comment.