Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support json_contains_path #8502

Merged
merged 17 commits into from
Dec 22, 2023
2 changes: 1 addition & 1 deletion dbms/src/Flash/Coprocessor/DAGUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ const std::unordered_map<tipb::ScalarFuncSig, String> scalar_func_map({
//{tipb::ScalarFuncSig::JsonArrayInsertSig, "cast"},
//{tipb::ScalarFuncSig::JsonMergePatchSig, "cast"},
//{tipb::ScalarFuncSig::JsonMergePreserveSig, "cast"},
//{tipb::ScalarFuncSig::JsonContainsPathSig, "cast"},
{tipb::ScalarFuncSig::JsonContainsPathSig, "json_contains_path"},
//{tipb::ScalarFuncSig::JsonPrettySig, "cast"},
//{tipb::ScalarFuncSig::JsonQuoteSig, "cast"},
//{tipb::ScalarFuncSig::JsonSearchSig, "cast"},
Expand Down
1 change: 1 addition & 0 deletions dbms/src/Functions/FunctionsJson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,6 @@ void registerFunctionsJson(FunctionFactory & factory)
factory.registerFunction<FunctionCastTimeAsJson>();
factory.registerFunction<FunctionCastDurationAsJson>();
factory.registerFunction<FunctionJsonDepth>();
factory.registerFunction<FunctionJsonContainsPath>();
}
} // namespace DB
312 changes: 312 additions & 0 deletions dbms/src/Functions/FunctionsJson.h
Original file line number Diff line number Diff line change
Expand Up @@ -1508,4 +1508,316 @@ class FunctionJsonDepth : public IFunction
}
}
};

class FunctionJsonContainsPath : public IFunction
{
public:
static constexpr auto name = "json_contains_path";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionJsonContainsPath>(); }

String getName() const override { return name; }

bool isVariadic() const override { return true; }

size_t getNumberOfArguments() const override { return 0; }

bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if unlikely (arguments.size() < 3)
{
throw Exception(
fmt::format("Illegal arguments count {} of function {}", arguments.size(), getName()),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
for (const auto & arg : arguments)
{
if unlikely (!arg->onlyNull() && !removeNullable(arg)->isString())
{
throw Exception(
fmt::format("Illegal type {} of argument of function {}", arg->getName(), getName()),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
if (arguments[0]->onlyNull() || arguments[1]->onlyNull() || arguments[2]->onlyNull())
return makeNullable(std::make_shared<DataTypeNothing>());
else
{
auto return_type = std::make_shared<DataTypeUInt8>();
for (const auto & arg : arguments)
{
if (arg->onlyNull() || arg->isNullable())
return makeNullable(return_type);
}
return return_type;
}
}

void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override
{
const auto & json_col = block.getByPosition(arguments[0]).column;
const auto & type_col = block.getByPosition(arguments[1]).column;
if (json_col->onlyNull() || type_col->onlyNull() || block.getByPosition(arguments[2]).column->onlyNull())
yibin87 marked this conversation as resolved.
Show resolved Hide resolved
{
block.getByPosition(result).column
= block.getByPosition(result).type->createColumnConst(block.rows(), Null());
return;
}

auto nested_block = createBlockWithNestedColumns(block, arguments);
auto json_source = createDynamicStringSource(*nested_block.getByPosition(arguments[0]).column);
auto type_source = createDynamicStringSource(*nested_block.getByPosition(arguments[1]).column);

size_t rows = block.rows();
auto col_to = ColumnUInt8::create(rows, 1);
auto & data_to = col_to->getData();
auto col_null_map = ColumnUInt8::create(rows, 0);
auto & vec_null_map = col_null_map->getData();

StringSources path_sources;
path_sources.reserve(arguments.size() - 2);
bool paths_nullable = false;
std::vector<const NullMap *> path_null_maps;
path_null_maps.reserve(arguments.size() - 2);
for (size_t i = 2; i < arguments.size(); ++i)
{
const auto & path_col = block.getByPosition(arguments[i]).column;
if (path_col->onlyNull())
{
path_sources.push_back(nullptr);
paths_nullable = true;
path_null_maps.push_back(nullptr);
}
else if (path_col->isColumnNullable())
{
path_sources.push_back(createDynamicStringSource(*nested_block.getByPosition(arguments[i]).column));
paths_nullable = true;
const auto & path_column_nullable = static_cast<const ColumnNullable &>(*path_col);
path_null_maps.push_back(&path_column_nullable.getNullMapData());
}
else
{
path_sources.push_back(createDynamicStringSource(*nested_block.getByPosition(arguments[i]).column));
path_null_maps.push_back(nullptr);
}
}

if (json_col->isColumnNullable())
{
const auto & json_column_nullable = static_cast<const ColumnNullable &>(*json_col);
if (type_col->isColumnNullable())
{
const auto & type_column_nullable = static_cast<const ColumnNullable &>(*type_col);
if (paths_nullable)
doExecute<true, true, true>(
json_source,
json_column_nullable.getNullMapData(),
type_source,
type_column_nullable.getNullMapData(),
path_sources,
path_null_maps,
rows,
data_to,
vec_null_map);
else
doExecute<true, true, false>(
json_source,
json_column_nullable.getNullMapData(),
type_source,
type_column_nullable.getNullMapData(),
path_sources,
path_null_maps,
rows,
data_to,
vec_null_map);
}
else
{
if (paths_nullable)
doExecute<true, false, true>(
json_source,
json_column_nullable.getNullMapData(),
type_source,
{},
path_sources,
path_null_maps,
rows,
data_to,
vec_null_map);
else
doExecute<true, false, false>(
json_source,
json_column_nullable.getNullMapData(),
type_source,
{},
path_sources,
path_null_maps,
rows,
data_to,
vec_null_map);
}
}
else
{
if (type_col->isColumnNullable())
{
const auto & type_column_nullable = static_cast<const ColumnNullable &>(*type_col);
if (paths_nullable)
doExecute<false, true, true>(
json_source,
{},
type_source,
type_column_nullable.getNullMapData(),
path_sources,
path_null_maps,
rows,
data_to,
vec_null_map);
else
doExecute<false, true, false>(
json_source,
{},
type_source,
type_column_nullable.getNullMapData(),
path_sources,
path_null_maps,
rows,
data_to,
vec_null_map);
}
else
{
if (paths_nullable)
doExecute<false, false, true>(
json_source,
{},
type_source,
{},
path_sources,
path_null_maps,
rows,
data_to,
vec_null_map);
else
doExecute<false, false, false>(
json_source,
{},
type_source,
{},
path_sources,
path_null_maps,
rows,
data_to,
vec_null_map);
}
}

auto & result_col = block.getByPosition(result);
if (result_col.type->onlyNull() || result_col.type->isNullable())
block.getByPosition(result).column = ColumnNullable::create(std::move(col_to), std::move(col_null_map));
else
block.getByPosition(result).column = std::move(col_to);
}

private:
template <bool is_json_nullable, bool is_type_nullable, bool paths_nullable>
void doExecute(
const std::unique_ptr<IStringSource> & json_source,
const NullMap & null_map_json,
const std::unique_ptr<IStringSource> & type_source,
const NullMap & null_map_type,
const StringSources & path_sources,
const std::vector<const NullMap *> & path_null_maps,
size_t rows,
ColumnUInt8::Container & data_to,
NullMap & null_map_to) const
{
#define FINISH_PER_ROW \
for (const auto & path_source : path_sources) \
{ \
if (path_source) \
path_source->next(); \
} \
json_source->next(); \
type_source->next();

for (size_t row = 0; row < rows; ++row)
{
if constexpr (is_json_nullable)
{
if (null_map_json[row])
{
FINISH_PER_ROW
null_map_to[row] = 1;
continue;
}
}
if constexpr (is_type_nullable)
{
if (null_map_type[row])
{
FINISH_PER_ROW
null_map_to[row] = 1;
continue;
}
}

const auto & json_val = json_source->getWhole();
JsonBinary json_binary{json_val.data[0], StringRef{&json_val.data[1], json_val.size - 1}};

const auto & type_val = type_source->getWhole();
std::string_view type{reinterpret_cast<const char *>(type_val.data), type_val.size};
if unlikely (!JsonBinary::isJSONContainsPathAll(type) && !JsonBinary::isJSONContainsPathOne(type))
throw Exception(
fmt::format("The second argument can only be either 'one' or 'all' of function {}.", getName()),
ErrorCodes::ILLEGAL_COLUMN);

auto & res = data_to[row]; // default 1.
for (size_t i = 0; i < path_sources.size(); ++i)
{
if constexpr (paths_nullable)
{
if (!path_sources[i] || (path_null_maps[i] && (*path_null_maps[i])[row]))
{
null_map_to[row] = 1;
break;
}
}

assert(path_sources[i]);
const auto & path_val = path_sources[i]->getWhole();
auto path_expr = JsonPathExpr::parseJsonPathExpr(StringRef{path_val.data, path_val.size});
/// If path_expr failed to parse, throw exception
if unlikely (!path_expr)
throw Exception(
fmt::format("Illegal json path expression of function {}", getName()),
ErrorCodes::ILLEGAL_COLUMN);
auto path_expr_containor = std::make_unique<JsonPathExprRefContainer>(path_expr);
std::vector<JsonPathExprRefContainerPtr> path_expr_containor_vec;
path_expr_containor_vec.push_back(std::move(path_expr_containor));
bool exists = !json_binary.extract(path_expr_containor_vec).empty();
if (exists && JsonBinary::isJSONContainsPathOne(type))
{
res = 1;
break;
}
else if (!exists && JsonBinary::isJSONContainsPathOne(type))
{
res = 0;
}
else if (!exists && JsonBinary::isJSONContainsPathAll(type))
{
res = 0;
break;
}
}

FINISH_PER_ROW
}

#undef SET_NULL_AND_CONTINUE
}
};
} // namespace DB
Loading