Skip to content

Commit

Permalink
Fix issues in multi-key json case and empty value case (apache#54)
Browse files Browse the repository at this point in the history
  • Loading branch information
PHILO-HE authored and zhztheplayer committed Mar 15, 2022
1 parent 7324c28 commit 66c3ce4
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 9 deletions.
11 changes: 7 additions & 4 deletions cpp/src/gandiva/json_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

#include "gandiva/json_holder.h"

#include <iostream>
#include <regex>

#include "gandiva/node.h"
Expand All @@ -42,7 +41,6 @@ const uint8_t* JsonHolder::operator()(gandiva::ExecutionContext* ctx, const std:
(parser->Finish(&parsed));
auto struct_parsed = std::dynamic_pointer_cast<arrow::StructArray>(parsed);
//json_path example: $.col_14, will extract col_14 here
// needs to gurad failure here
if (json_path.length() < 3) {
return nullptr;
}
Expand All @@ -58,9 +56,14 @@ const uint8_t* JsonHolder::operator()(gandiva::ExecutionContext* ctx, const std:
return nullptr;
}
auto dict_array = dict_parsed->dictionary();
// needs to see whether there is a case that has more than one indices.
auto res_index = dict_parsed->GetValueIndex(0);
auto utf8_array = std::dynamic_pointer_cast<arrow::BinaryArray>(dict_array);
auto res = utf8_array->GetValue(0, out_len);

auto res = utf8_array->GetValue(res_index, out_len);
// empty string case.
if (*out_len == 0) {
return reinterpret_cast<const uint8_t*>("");
}
uint8_t* result_buffer = reinterpret_cast<uint8_t*>(ctx->arena()->Allocate(*out_len));
memcpy(result_buffer, std::string((char*)res, *out_len).data(), *out_len);
return result_buffer;
Expand Down
28 changes: 23 additions & 5 deletions cpp/src/gandiva/json_holder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,42 @@ TEST_F(TestJsonHolder, TestJson) {

int32_t out_len;

const uint8_t* data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$.hello", &out_len);
const uint8_t* data = get_json_object(&execution_context_, R"({"hello": "3.5"})", "$.hello", &out_len);
EXPECT_EQ(std::string((char*)data, out_len), "3.5");

// test the case that value is not surrended by double quotes.
data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$.hello", &out_len);
EXPECT_EQ(out_len, 3);
EXPECT_EQ(std::string((char*)data, out_len), "3.5");

// no data contained for given field.
data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$.hi", &out_len);
EXPECT_EQ(data, nullptr);

// empty string.
data = get_json_object(&execution_context_, R"({"hello": ""})", "$.hello", &out_len);
EXPECT_EQ(out_len, 0);
EXPECT_EQ(std::string((char*)data, out_len), "");

// illegal json string.
data = get_json_object(&execution_context_, R"({"hello"-3.5})", "$.hello", &out_len);
EXPECT_EQ(data, nullptr);

// illegal field is given.
data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$xx", &out_len);
// field name is incorrectly given.
data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$hello", &out_len);
EXPECT_EQ(data, nullptr);

// illegal field is given and a short string field.
data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$", &out_len);
// field name is not given.
data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$.", &out_len);
EXPECT_EQ(data, nullptr);

data = get_json_object(&execution_context_, R"({"name": "fang", "age": 5, "id": "001"})", "$.age", &out_len);
EXPECT_EQ(out_len, 1);
EXPECT_EQ(std::string((char*)data, out_len), "5");

data = get_json_object(&execution_context_, R"({"name": "fang", "age": "5", "id": "001"})", "$.id", &out_len);
EXPECT_EQ(out_len, 3);
EXPECT_EQ(std::string((char*)data, out_len), "001");
}

} // namespace gandiva

0 comments on commit 66c3ce4

Please sign in to comment.