diff --git a/cpp/src/gandiva/json_holder.cc b/cpp/src/gandiva/json_holder.cc index 7862016696566..9e0263786e3d6 100644 --- a/cpp/src/gandiva/json_holder.cc +++ b/cpp/src/gandiva/json_holder.cc @@ -17,7 +17,6 @@ #include "gandiva/json_holder.h" -#include #include #include "gandiva/node.h" @@ -42,7 +41,6 @@ const uint8_t* JsonHolder::operator()(gandiva::ExecutionContext* ctx, const std: (parser->Finish(&parsed)); auto struct_parsed = std::dynamic_pointer_cast(parsed); //json_path example: $.col_14, will extract col_14 here - // needs to gurad failure here if (json_path.length() < 3) { return nullptr; } @@ -58,9 +56,14 @@ const uint8_t* JsonHolder::operator()(gandiva::ExecutionContext* ctx, const std: return nullptr; } auto dict_array = dict_parsed->dictionary(); + // needs to see whether there is a case that has more than one indices. + auto res_index = dict_parsed->GetValueIndex(0); auto utf8_array = std::dynamic_pointer_cast(dict_array); - auto res = utf8_array->GetValue(0, out_len); - + auto res = utf8_array->GetValue(res_index, out_len); + // empty string case. + if (*out_len == 0) { + return reinterpret_cast(""); + } uint8_t* result_buffer = reinterpret_cast(ctx->arena()->Allocate(*out_len)); memcpy(result_buffer, std::string((char*)res, *out_len).data(), *out_len); return result_buffer; diff --git a/cpp/src/gandiva/json_holder_test.cc b/cpp/src/gandiva/json_holder_test.cc index 9767962f27693..60f60b941dc04 100644 --- a/cpp/src/gandiva/json_holder_test.cc +++ b/cpp/src/gandiva/json_holder_test.cc @@ -41,24 +41,42 @@ TEST_F(TestJsonHolder, TestJson) { int32_t out_len; - const uint8_t* data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$.hello", &out_len); + const uint8_t* data = get_json_object(&execution_context_, R"({"hello": "3.5"})", "$.hello", &out_len); + EXPECT_EQ(std::string((char*)data, out_len), "3.5"); + + // test the case that value is not surrended by double quotes. + data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$.hello", &out_len); + EXPECT_EQ(out_len, 3); EXPECT_EQ(std::string((char*)data, out_len), "3.5"); // no data contained for given field. data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$.hi", &out_len); EXPECT_EQ(data, nullptr); + // empty string. + data = get_json_object(&execution_context_, R"({"hello": ""})", "$.hello", &out_len); + EXPECT_EQ(out_len, 0); + EXPECT_EQ(std::string((char*)data, out_len), ""); + // illegal json string. data = get_json_object(&execution_context_, R"({"hello"-3.5})", "$.hello", &out_len); EXPECT_EQ(data, nullptr); - // illegal field is given. - data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$xx", &out_len); + // field name is incorrectly given. + data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$hello", &out_len); EXPECT_EQ(data, nullptr); - // illegal field is given and a short string field. - data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$", &out_len); + // field name is not given. + data = get_json_object(&execution_context_, R"({"hello": 3.5})", "$.", &out_len); EXPECT_EQ(data, nullptr); + + data = get_json_object(&execution_context_, R"({"name": "fang", "age": 5, "id": "001"})", "$.age", &out_len); + EXPECT_EQ(out_len, 1); + EXPECT_EQ(std::string((char*)data, out_len), "5"); + + data = get_json_object(&execution_context_, R"({"name": "fang", "age": "5", "id": "001"})", "$.id", &out_len); + EXPECT_EQ(out_len, 3); + EXPECT_EQ(std::string((char*)data, out_len), "001"); } } // namespace gandiva