Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issues in multi-key json case and empty value case #54

Merged
merged 1 commit into from
Dec 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions cpp/src/gandiva/json_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

#include "gandiva/json_holder.h"

#include <iostream>
#include <regex>

#include "gandiva/node.h"
Expand All @@ -42,7 +41,6 @@ const uint8_t* JsonHolder::operator()(gandiva::ExecutionContext* ctx, const std:
(parser->Finish(&parsed));
auto struct_parsed = std::dynamic_pointer_cast<arrow::StructArray>(parsed);
//json_path example: $.col_14, will extract col_14 here
// needs to gurad failure here
if (json_path.length() < 3) {
return nullptr;
}
Expand All @@ -58,9 +56,14 @@ const uint8_t* JsonHolder::operator()(gandiva::ExecutionContext* ctx, const std:
return nullptr;
}
auto dict_array = dict_parsed->dictionary();
// needs to see whether there is a case that has more than one indices.
auto res_index = dict_parsed->GetValueIndex(0);
auto utf8_array = std::dynamic_pointer_cast<arrow::BinaryArray>(dict_array);
auto res = utf8_array->GetValue(0, out_len);

auto res = utf8_array->GetValue(res_index, out_len);
// empty string case.
if (*out_len == 0) {
return reinterpret_cast<const uint8_t*>("");
}
uint8_t* result_buffer = reinterpret_cast<uint8_t*>(ctx->arena()->Allocate(*out_len));
memcpy(result_buffer, std::string((char*)res, *out_len).data(), *out_len);
return result_buffer;
Expand Down
28 changes: 23 additions & 5 deletions cpp/src/gandiva/json_holder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,42 @@ TEST_F(TestJsonHolder, TestJson) {

int32_t out_len;

const uint8_t* data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$.hello", &out_len);
const uint8_t* data = get_json_object(&execution_context_, R"({"hello": "3.5"})", "$.hello", &out_len);
EXPECT_EQ(std::string((char*)data, out_len), "3.5");

// test the case that value is not surrended by double quotes.
data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$.hello", &out_len);
EXPECT_EQ(out_len, 3);
EXPECT_EQ(std::string((char*)data, out_len), "3.5");

// no data contained for given field.
data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$.hi", &out_len);
EXPECT_EQ(data, nullptr);

// empty string.
data = get_json_object(&execution_context_, R"({"hello": ""})", "$.hello", &out_len);
EXPECT_EQ(out_len, 0);
EXPECT_EQ(std::string((char*)data, out_len), "");

// illegal json string.
data = get_json_object(&execution_context_, R"({"hello"-3.5})", "$.hello", &out_len);
EXPECT_EQ(data, nullptr);

// illegal field is given.
data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$xx", &out_len);
// field name is incorrectly given.
data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$hello", &out_len);
EXPECT_EQ(data, nullptr);

// illegal field is given and a short string field.
data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$", &out_len);
// field name is not given.
data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$.", &out_len);
EXPECT_EQ(data, nullptr);

data = get_json_object(&execution_context_, R"({"name": "fang", "age": 5, "id": "001"})", "$.age", &out_len);
EXPECT_EQ(out_len, 1);
EXPECT_EQ(std::string((char*)data, out_len), "5");

data = get_json_object(&execution_context_, R"({"name": "fang", "age": "5", "id": "001"})", "$.id", &out_len);
EXPECT_EQ(out_len, 3);
EXPECT_EQ(std::string((char*)data, out_len), "001");
}

} // namespace gandiva