diff --git a/be/src/vec/columns/column_varbinary.h b/be/src/vec/columns/column_varbinary.h index 20417ba2d6643b..6411eb26ec0850 100644 --- a/be/src/vec/columns/column_varbinary.h +++ b/be/src/vec/columns/column_varbinary.h @@ -70,6 +70,8 @@ class ColumnVarbinary final : public COWHelper { StringRef get_data_at(size_t n) const override { return _data[n].to_string_ref(); } + char* alloc(size_t length) { return _arena.alloc(length); } + void insert(const Field& x) override { auto value = vectorized::get(x); insert_data(value.data(), value.size()); @@ -101,6 +103,12 @@ class ColumnVarbinary final : public COWHelper { void insert_default() override { _data.push_back(doris::StringView()); } + int compare_at(size_t n, size_t m, const IColumn& rhs_, + int /*nan_direction_hint*/) const override { + const ColumnVarbinary& rhs = assert_cast(rhs_); + return this->_data[n].compare(rhs.get_data()[m]); + } + void pop_back(size_t n) override { resize(size() - n); } StringRef serialize_value_into_arena(size_t n, Arena& arena, diff --git a/be/src/vec/common/pod_array.h b/be/src/vec/common/pod_array.h index f361f1e078b23e..8c0121c916dffb 100644 --- a/be/src/vec/common/pod_array.h +++ b/be/src/vec/common/pod_array.h @@ -548,6 +548,11 @@ class PODArray : public PODArrayBasec_end -= this->byte_size(1); } + void pop_back(size_t n) { + DCHECK_GE(this->size(), n); + this->c_end -= this->byte_size(n); + } + /// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated. template void insert_prepare(It1 from_begin, It2 from_end, TAllocatorParams&&... allocator_params) { diff --git a/be/src/vec/common/string_view.h b/be/src/vec/common/string_view.h index 88fcc14c6d843e..5cd560aad4a871 100644 --- a/be/src/vec/common/string_view.h +++ b/be/src/vec/common/string_view.h @@ -99,6 +99,8 @@ class StringView { uint32_t size() const { return size_; } bool empty() const { return size() == 0; } + void set_size(uint32_t size) { size_ = size; } + bool operator==(const StringView& other) const; friend std::ostream& operator<<(std::ostream& os, const StringView& stringView) { os.write(stringView.data(), stringView.size()); diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index f52e90a5bdfb80..7a6c71eadbe946 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -972,6 +972,7 @@ struct UnHexImpl { static constexpr auto name = Name::name; using ReturnType = DataTypeString; using ColumnType = ColumnString; + static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING; static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) { @@ -1081,6 +1082,7 @@ struct ToBase64Impl { static constexpr auto name = "to_base64"; using ReturnType = DataTypeString; using ColumnType = ColumnString; + static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING; static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) { diff --git a/be/src/vec/functions/function_totype.h b/be/src/vec/functions/function_totype.h index 714d2fd166117e..a0bb623a355185 100644 --- a/be/src/vec/functions/function_totype.h +++ b/be/src/vec/functions/function_totype.h @@ -22,6 +22,7 @@ #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" +#include "vec/columns/column_varbinary.h" #include "vec/columns/column_vector.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_bitmap.h" @@ -123,6 +124,13 @@ class FunctionUnaryToType : public IFunction { block.replace_by_position(result, std::move(col_res)); return Status::OK(); } + } else if constexpr (Impl::PrimitiveTypeImpl == PrimitiveType::TYPE_VARBINARY) { + if (const auto* col = check_and_get_column(column.get())) { + auto col_res = Impl::ReturnColumnType::create(); + RETURN_IF_ERROR(Impl::vector(col->get_data(), col_res->get_data())); + block.replace_by_position(result, std::move(col_res)); + return Status::OK(); + } } return Status::RuntimeError("Illegal column {} of argument of function {}", block.get_by_position(arguments[0]).column->get_name(), @@ -465,11 +473,19 @@ class FunctionStringOperateToNullType : public IFunction { auto& col_ptr = block.get_by_position(arguments[0]).column; - auto res = Impl::ColumnType::create(); if (const auto* col = check_and_get_column(col_ptr.get())) { auto col_res = Impl::ColumnType::create(); - RETURN_IF_ERROR(Impl::vector(col->get_chars(), col->get_offsets(), col_res->get_chars(), - col_res->get_offsets(), null_map->get_data())); + if constexpr (std::is_same_v) { + RETURN_IF_ERROR(Impl::vector(col->get_chars(), col->get_offsets(), + col_res->get_chars(), col_res->get_offsets(), + null_map->get_data())); + } else if (std::is_same_v) { + RETURN_IF_ERROR(Impl::vector(col->get_chars(), col->get_offsets(), col_res.get(), + null_map->get_data())); + } else { + return Status::RuntimeError("Illegal returntype {} of argument of function {}", + col_res->get_name(), get_name()); + } block.replace_by_position( result, ColumnNullable::create(std::move(col_res), std::move(null_map))); } else { @@ -506,31 +522,48 @@ class FunctionStringEncode : public IFunction { if constexpr (is_allow_null) { auto null_map = ColumnUInt8::create(input_rows_count, 0); auto& null_map_data = null_map->get_data(); - if (const auto* col = assert_cast(col_ptr.get())) { - auto col_res = Impl::ColumnType::create(); - RETURN_IF_ERROR(Impl::vector(col->get_chars(), col->get_offsets(), - col_res->get_chars(), col_res->get_offsets(), - &null_map_data)); - block.get_by_position(result).column = - ColumnNullable::create(std::move(col_res), std::move(null_map)); - } else { - return Status::RuntimeError("Illegal column {} of argument of function {}", - block.get_by_position(arguments[0]).column->get_name(), - get_name()); + if constexpr (Impl::PrimitiveTypeImpl == PrimitiveType::TYPE_STRING) { + if (const auto* col = assert_cast(col_ptr.get())) { + auto col_res = Impl::ColumnType::create(); + RETURN_IF_ERROR(Impl::vector(col->get_chars(), col->get_offsets(), + col_res->get_chars(), col_res->get_offsets(), + &null_map_data)); + block.get_by_position(result).column = + ColumnNullable::create(std::move(col_res), std::move(null_map)); + return Status::OK(); + } + } else if (Impl::PrimitiveTypeImpl == PrimitiveType::TYPE_VARBINARY) { + if (const auto* col = assert_cast(col_ptr.get())) { + auto col_res = Impl::ColumnType::create(); + RETURN_IF_ERROR(Impl::vector(col->get_data(), col_res->get_chars(), + col_res->get_offsets(), &null_map_data)); + block.get_by_position(result).column = + ColumnNullable::create(std::move(col_res), std::move(null_map)); + return Status::OK(); + } } } else { - if (const auto* col = assert_cast(col_ptr.get())) { - auto col_res = Impl::ColumnType::create(); - RETURN_IF_ERROR(Impl::vector(col->get_chars(), col->get_offsets(), - col_res->get_chars(), col_res->get_offsets())); - block.replace_by_position(result, std::move(col_res)); - } else { - return Status::RuntimeError("Illegal column {} of argument of function {}", - block.get_by_position(arguments[0]).column->get_name(), - get_name()); + if constexpr (Impl::PrimitiveTypeImpl == PrimitiveType::TYPE_STRING) { + if (const auto* col = assert_cast(col_ptr.get())) { + auto col_res = Impl::ColumnType::create(); + RETURN_IF_ERROR(Impl::vector(col->get_chars(), col->get_offsets(), + col_res->get_chars(), col_res->get_offsets())); + block.replace_by_position(result, std::move(col_res)); + return Status::OK(); + } + } else if (Impl::PrimitiveTypeImpl == PrimitiveType::TYPE_VARBINARY) { + if (const auto* col = assert_cast(col_ptr.get())) { + auto col_res = Impl::ColumnType::create(); + RETURN_IF_ERROR(Impl::vector(col->get_data(), col_res->get_chars(), + col_res->get_offsets())); + block.replace_by_position(result, std::move(col_res)); + return Status::OK(); + } } } - return Status::OK(); + return Status::RuntimeError("Illegal column {} of argument of function {}", + block.get_by_position(arguments[0]).column->get_name(), + get_name()); } }; } // namespace doris::vectorized diff --git a/be/src/vec/functions/function_varbinary.cpp b/be/src/vec/functions/function_varbinary.cpp index a64cd99392ce62..ec592c236bc14f 100644 --- a/be/src/vec/functions/function_varbinary.cpp +++ b/be/src/vec/functions/function_varbinary.cpp @@ -15,12 +15,15 @@ // specific language governing permissions and limitations // under the License. +#include "vec/functions/function_varbinary.h" + #include #include #include #include "common/status.h" +#include "util/url_coding.h" #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" @@ -32,10 +35,12 @@ #include "vec/data_types/data_type_varbinary.h" #include "vec/functions/function.h" #include "vec/functions/function_helpers.h" +#include "vec/functions/function_totype.h" #include "vec/functions/simple_function_factory.h" #include "vec/functions/string_hex_util.h" namespace doris::vectorized { +#include "common/compile_check_begin.h" class FunctionToBinary : public IFunction { public: @@ -59,30 +64,24 @@ class FunctionToBinary : public IFunction { auto col_res = ColumnVarbinary::create(); const auto& data = col->get_chars(); const auto& offsets = col->get_offsets(); + col_res->get_data().assign(input_rows_count, StringView()); - std::array stack_buf; - std::vector heap_buf; for (int i = 0; i < input_rows_count; ++i) { const auto* source = reinterpret_cast(&data[offsets[i - 1]]); ColumnString::Offset srclen = offsets[i] - offsets[i - 1]; - auto cipher_len = srclen / 2; - char* dst = nullptr; - if (cipher_len <= stack_buf.size()) { - dst = stack_buf.data(); - } else { - heap_buf.resize(cipher_len); - dst = heap_buf.data(); - } + int cipher_len = srclen / 2; + auto [cipher_inline, dst] = VarBinaryOP::alloc(col_res.get(), i, cipher_len); + int outlen = string_hex::hex_decode(source, srclen, dst); // if empty string or decode failed, may return NULL if (outlen == 0) { null_map->get_data()[i] = 1; - col_res->insert_default(); continue; } - col_res->insert_data(dst, outlen); + VarBinaryOP::check_and_insert_data(col_res->get_data()[i], dst, + cast_set(outlen), cipher_inline); } block.replace_by_position( result, ColumnNullable::create(std::move(col_res), std::move(null_map))); @@ -143,11 +142,124 @@ class FunctionFromBinary : public IFunction { } }; +struct NameVarbinaryLength { + static constexpr auto name = "length"; +}; + +struct VarbinaryLengthImpl { + using ReturnType = DataTypeInt32; + using ReturnColumnType = ColumnInt32; + static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY; + + static DataTypes get_variadic_argument_types() { + return {std::make_shared()}; + } + + static Status vector(const PaddedPODArray& data, + PaddedPODArray& res) { + size_t rows_count = data.size(); + res.resize(rows_count); + for (size_t i = 0; i < rows_count; ++i) { + res[i] = data[i].size(); + } + return Status::OK(); + } +}; + +using FunctionBinaryLength = FunctionUnaryToType; + +struct ToBase64BinaryImpl { + static constexpr auto name = "to_base64_binary"; + using ReturnType = DataTypeString; + using ColumnType = ColumnString; + static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY; + + static Status vector(const PaddedPODArray& data, + ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) { + auto rows_count = data.size(); + dst_offsets.resize(rows_count); + + size_t total_size = 0; + for (size_t i = 0; i < rows_count; i++) { + total_size += 4 * ((data[i].size() + 2) / 3); + } + ColumnString::check_chars_length(total_size, rows_count); + dst_data.resize(total_size); + auto* dst_data_ptr = dst_data.data(); + size_t offset = 0; + + for (size_t i = 0; i < rows_count; i++) { + auto binary = data[i]; + auto binlen = binary.size(); + + if (UNLIKELY(binlen == 0)) { + dst_offsets[i] = cast_set(offset); + continue; + } + + auto outlen = doris::base64_encode( + reinterpret_cast(binary.data()), binlen, + reinterpret_cast(dst_data_ptr + offset)); + + offset += outlen; + dst_offsets[i] = cast_set(offset); + } + + dst_data.pop_back(total_size - offset); + + return Status::OK(); + } +}; + +using FunctionToBase64Binary = FunctionStringEncode; + +struct FromBase64BinaryImpl { + static constexpr auto name = "from_base64_binary"; + using ReturnType = DataTypeVarbinary; + using ColumnType = ColumnVarbinary; + + static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, + ColumnVarbinary* res, NullMap& null_map) { + auto rows_count = offsets.size(); + res->get_data().assign(rows_count, StringView()); + + for (size_t i = 0; i < rows_count; i++) { + const auto* source = reinterpret_cast(&data[offsets[i - 1]]); + ColumnString::Offset slen = offsets[i] - offsets[i - 1]; + + if (UNLIKELY(slen == 0)) { + continue; + } + + int cipher_len = slen / 4 * 3; + auto [cipher_inline, dst] = VarBinaryOP::alloc(res, i, cipher_len); + + auto outlen = doris::base64_decode(source, slen, dst); + + if (outlen < 0) { + null_map[i] = 1; + } else { + VarBinaryOP::check_and_insert_data(res->get_data()[i], dst, + cast_set(outlen), cipher_inline); + } + } + + return Status::OK(); + } +}; + +using FunctionFromBase64Binary = FunctionStringOperateToNullType; + void register_function_binary(SimpleFunctionFactory& factory) { + factory.register_function(); + factory.register_function(); + factory.register_function(); + factory.register_function(); factory.register_function(); factory.register_function(); factory.register_alias("from_binary", "from_hex"); factory.register_alias("to_binary", "to_hex"); } +#include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/functions/function_varbinary.h b/be/src/vec/functions/function_varbinary.h new file mode 100644 index 00000000000000..87a7008ae975fa --- /dev/null +++ b/be/src/vec/functions/function_varbinary.h @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "common/status.h" +#include "vec/columns/column_const.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_varbinary.h" +#include "vec/core/block.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" +#include "vec/data_types/data_type_varbinary.h" +#include "vec/functions/function.h" +#include "vec/utils/varbinaryop_subbinary.h" + +namespace doris::vectorized { +#include "common/compile_check_begin.h" + +class FunctionSubBinary : public IFunction { +public: + static constexpr auto name = "sub_binary"; + static FunctionPtr create() { return std::make_shared(); } + String get_name() const override { return name; } + size_t get_number_of_arguments() const override { return 3; } + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return std::make_shared(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + uint32_t result, size_t input_rows_count) const override { + SubBinaryUtil::sub_binary_execute(block, arguments, result, input_rows_count); + return Status::OK(); + } +}; + +#include "common/compile_check_end.h" +} // namespace doris::vectorized diff --git a/be/src/vec/utils/varbinaryop_subbinary.h b/be/src/vec/utils/varbinaryop_subbinary.h new file mode 100644 index 00000000000000..ef4d0b1d20a2ad --- /dev/null +++ b/be/src/vec/utils/varbinaryop_subbinary.h @@ -0,0 +1,119 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "vec/columns/column_const.h" +#include "vec/columns/column_varbinary.h" +#include "vec/core/block.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_varbinary.h" + +namespace doris::vectorized { +#include "common/compile_check_begin.h" + +constexpr auto SIZE_OF_UINT = sizeof(uint32_t); + +struct VarBinaryOP { + static void check_and_insert_data(doris::StringView& sView, const char* data, uint32_t len, + bool before_is_inline) { + if (before_is_inline) { + sView.set_size(len); + } else { + sView = doris::StringView(data, len); + } + } + + static std::pair alloc(ColumnVarbinary* res_col, size_t index, uint32_t len) { + bool is_inline = StringView::isInline(len); + char* dst = nullptr; + if (is_inline) { + dst = reinterpret_cast(&(res_col->get_data()[index])) + SIZE_OF_UINT; + } else { + dst = res_col->alloc(len); + } + return {is_inline, dst}; + } +}; + +struct SubBinaryUtil { + static void sub_binary_execute(Block& block, const ColumnNumbers& arguments, uint32_t result, + size_t input_rows_count) { + DCHECK_EQ(arguments.size(), 3); + auto res = ColumnVarbinary::create(); + + bool col_const[3]; + ColumnPtr argument_columns[3]; + for (int i = 0; i < 3; ++i) { + std::tie(argument_columns[i], col_const[i]) = + unpack_if_const(block.get_by_position(arguments[i]).column); + } + + const auto* specific_binary_column = + assert_cast(argument_columns[0].get()); + const auto* specific_start_column = + assert_cast(argument_columns[1].get()); + const auto* specific_len_column = + assert_cast(argument_columns[2].get()); + + std::visit( + [&](auto binary_const, auto start_const, auto len_const) { + vectors( + specific_binary_column, specific_start_column, specific_len_column, + res.get(), input_rows_count); + }, + vectorized::make_bool_variant(col_const[0]), + vectorized::make_bool_variant(col_const[1]), + vectorized::make_bool_variant(col_const[2])); + block.get_by_position(result).column = std::move(res); + } + +private: + template + static void vectors(const ColumnVarbinary* binarys, const ColumnInt32* start, + const ColumnInt32* len, ColumnVarbinary* res, size_t size) { + res->get_data().reserve(size); + + for (size_t i = 0; i < size; ++i) { + doris::StringView binary = binarys->get_data()[index_check_const(i)]; + int binary_size = static_cast(binary.size()); + + int start_value = start->get_data()[index_check_const(i)]; + int len_value = len->get_data()[index_check_const(i)]; + + bool start_out_of_range = (start_value > binary_size) || (start_value < -binary_size); + bool len_non_positive = len_value <= 0; + bool input_empty = binary_size == 0; + + if (start_out_of_range || len_non_positive || input_empty) { + res->insert_default(); + continue; + } + int fixed_pos = start_value - 1; + if (fixed_pos < 0) { + fixed_pos = binary_size + fixed_pos + 1; + } + int fixed_len = std::min(binary_size - fixed_pos, len_value); + + res->insert_data(binary.data() + fixed_pos, fixed_len); + } + } +}; + +#include "common/compile_check_end.h" +} // namespace doris::vectorized diff --git a/be/test/vec/function/function_hash_test.cpp b/be/test/vec/function/function_hash_test.cpp index f98430b3c64904..3333fa72e9bd2e 100644 --- a/be/test/vec/function/function_hash_test.cpp +++ b/be/test/vec/function/function_hash_test.cpp @@ -29,6 +29,7 @@ #include "vec/data_types/data_type_number.h" namespace doris::vectorized { +using namespace ut_type; TEST(HashFunctionTest, murmur_hash_3_test) { std::string func_name = "murmur_hash3_32"; @@ -130,7 +131,7 @@ TEST(HashFunctionTest, xxhash_32_test) { { InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY}; - DataSet data_set = {{{Null()}, Null()}, {{std::string("hello")}, (int32_t)-83855367}}; + DataSet data_set = {{{Null()}, Null()}, {{VARBINARY("hello")}, (int32_t)-83855367}}; static_cast(check_function(func_name, input_types, data_set)); }; @@ -138,8 +139,8 @@ TEST(HashFunctionTest, xxhash_32_test) { { InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY, PrimitiveType::TYPE_VARBINARY}; - DataSet data_set = {{{std::string("hello"), std::string("world")}, (int32_t)-920844969}, - {{std::string("hello"), Null()}, Null()}}; + DataSet data_set = {{{VARBINARY("hello"), VARBINARY("world")}, (int32_t)-920844969}, + {{VARBINARY("hello"), Null()}, Null()}}; static_cast(check_function(func_name, input_types, data_set)); }; @@ -148,9 +149,9 @@ TEST(HashFunctionTest, xxhash_32_test) { InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY, PrimitiveType::TYPE_VARBINARY, PrimitiveType::TYPE_VARBINARY}; - DataSet data_set = {{{std::string("hello"), std::string("world"), std::string("!")}, - (int32_t)352087701}, - {{std::string("hello"), std::string("world"), Null()}, Null()}}; + DataSet data_set = { + {{VARBINARY("hello"), VARBINARY("world"), VARBINARY("!")}, (int32_t)352087701}, + {{VARBINARY("hello"), VARBINARY("world"), Null()}, Null()}}; static_cast(check_function(func_name, input_types, data_set)); }; @@ -193,7 +194,7 @@ TEST(HashFunctionTest, xxhash_64_test) { InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY}; DataSet data_set = {{{Null()}, Null()}, - {{std::string("hello")}, (int64_t)-7685981735718036227}}; + {{VARBINARY("hello")}, (int64_t)-7685981735718036227}}; static_cast(check_function(func_name, input_types, data_set)); }; @@ -202,8 +203,8 @@ TEST(HashFunctionTest, xxhash_64_test) { InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY, PrimitiveType::TYPE_VARBINARY}; DataSet data_set = { - {{std::string("hello"), std::string("world")}, (int64_t)7001965798170371843}, - {{std::string("hello"), Null()}, Null()}}; + {{VARBINARY("hello"), VARBINARY("world")}, (int64_t)7001965798170371843}, + {{VARBINARY("hello"), Null()}, Null()}}; static_cast(check_function(func_name, input_types, data_set)); }; @@ -212,9 +213,9 @@ TEST(HashFunctionTest, xxhash_64_test) { InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY, PrimitiveType::TYPE_VARBINARY, PrimitiveType::TYPE_VARBINARY}; - DataSet data_set = {{{std::string("hello"), std::string("world"), std::string("!")}, + DataSet data_set = {{{VARBINARY("hello"), VARBINARY("world"), VARBINARY("!")}, (int64_t)6796829678999971400}, - {{std::string("hello"), std::string("world"), Null()}, Null()}}; + {{VARBINARY("hello"), VARBINARY("world"), Null()}, Null()}}; static_cast(check_function(func_name, input_types, data_set)); }; diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp index e5c2a044fe6ebd..84272d4475cd84 100644 --- a/be/test/vec/function/function_string_test.cpp +++ b/be/test/vec/function/function_string_test.cpp @@ -1970,27 +1970,27 @@ TEST(function_string_test, function_md5sum_test) { { InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY}; DataSet data_set = { - {{std::string("asd你好")}, {std::string("a38c15675555017e6b8ea042f2eb24f5")}}, - {{std::string("hello world")}, {std::string("5eb63bbbe01eeed093cb22bb8f5acdc3")}}, - {{std::string("HELLO,!^%")}, {std::string("b8e6e34d1cc3dc76b784ddfdfb7df800")}}, - {{std::string("")}, {std::string("d41d8cd98f00b204e9800998ecf8427e")}}, - {{std::string(" ")}, {std::string("7215ee9c7d9dc229d2921a40e899ec5f")}}, + {{VARBINARY("asd你好")}, {std::string("a38c15675555017e6b8ea042f2eb24f5")}}, + {{VARBINARY("hello world")}, {std::string("5eb63bbbe01eeed093cb22bb8f5acdc3")}}, + {{VARBINARY("HELLO,!^%")}, {std::string("b8e6e34d1cc3dc76b784ddfdfb7df800")}}, + {{VARBINARY("")}, {std::string("d41d8cd98f00b204e9800998ecf8427e")}}, + {{VARBINARY(" ")}, {std::string("7215ee9c7d9dc229d2921a40e899ec5f")}}, {{Null()}, {Null()}}, - {{std::string("MYtestSTR")}, {std::string("cd24c90b3fc1192eb1879093029e87d4")}}, - {{std::string("ò&ø")}, {std::string("fd157b4cb921fa91acc667380184d59c")}}}; + {{VARBINARY("MYtestSTR")}, {std::string("cd24c90b3fc1192eb1879093029e87d4")}}, + {{VARBINARY("ò&ø")}, {std::string("fd157b4cb921fa91acc667380184d59c")}}}; check_function_all_arg_comb(func_name, input_types, data_set); } { InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY, PrimitiveType::TYPE_VARBINARY}; - DataSet data_set = {{{std::string("asd"), std::string("你好")}, + DataSet data_set = {{{VARBINARY("asd"), VARBINARY("你好")}, {std::string("a38c15675555017e6b8ea042f2eb24f5")}}, - {{std::string("hello "), std::string("world")}, + {{VARBINARY("hello "), VARBINARY("world")}, {std::string("5eb63bbbe01eeed093cb22bb8f5acdc3")}}, - {{std::string("HELLO"), std::string(",!^%")}, + {{VARBINARY("HELLO"), VARBINARY(",!^%")}, {std::string("b8e6e34d1cc3dc76b784ddfdfb7df800")}}, - {{Null(), std::string("HELLO")}, {Null()}}}; + {{Null(), VARBINARY("HELLO")}, {Null()}}}; check_function_all_arg_comb(func_name, input_types, data_set); } @@ -1998,13 +1998,13 @@ TEST(function_string_test, function_md5sum_test) { { InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY, PrimitiveType::TYPE_VARBINARY, PrimitiveType::TYPE_VARBINARY}; - DataSet data_set = {{{std::string("a"), std::string("sd"), std::string("你好")}, + DataSet data_set = {{{VARBINARY("a"), VARBINARY("sd"), VARBINARY("你好")}, {std::string("a38c15675555017e6b8ea042f2eb24f5")}}, - {{std::string(""), std::string(""), std::string("")}, + {{VARBINARY(""), VARBINARY(""), VARBINARY("")}, {std::string("d41d8cd98f00b204e9800998ecf8427e")}}, - {{std::string("HEL"), std::string("LO,!"), std::string("^%")}, + {{VARBINARY("HEL"), VARBINARY("LO,!"), VARBINARY("^%")}, {std::string("b8e6e34d1cc3dc76b784ddfdfb7df800")}}, - {{Null(), std::string("HELLO"), Null()}, {Null()}}}; + {{Null(), VARBINARY("HELLO"), Null()}, {Null()}}}; check_function_all_arg_comb(func_name, input_types, data_set); } @@ -2068,20 +2068,20 @@ TEST(function_string_test, function_sm3sum_test) { { InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY}; DataSet data_set = { - {{std::string("asd你好")}, + {{VARBINARY("asd你好")}, {std::string("0d6b9dfa8fe5708eb0dccfbaff4f2964abaaa976cc4445a7ecace49c0ceb31d3")}}, - {{std::string("hello world")}, + {{VARBINARY("hello world")}, {std::string("44f0061e69fa6fdfc290c494654a05dc0c053da7e5c52b84ef93a9d67d3fff88")}}, - {{std::string("HELLO,!^%")}, + {{VARBINARY("HELLO,!^%")}, {std::string("5fc6e38f40b31a659a59e1daba9b68263615f20c02037b419d9deb3509e6b5c6")}}, - {{std::string("")}, + {{VARBINARY("")}, {std::string("1ab21d8355cfa17f8e61194831e81a8f22bec8c728fefb747ed035eb5082aa2b")}}, - {{std::string(" ")}, + {{VARBINARY(" ")}, {std::string("2ae1d69bb8483e5944310c877573b21d0a420c3bf4a2a91b1a8370d760ba67c5")}}, {{Null()}, {Null()}}, - {{std::string("MYtestSTR")}, + {{VARBINARY("MYtestSTR")}, {std::string("3155ae9f834cae035385fc15b69b6f2c051b91de943ea9a03ab8bfd497aef4c6")}}, - {{std::string("ò&ø")}, + {{VARBINARY("ò&ø")}, {std::string( "aa47ac31c85aa819d4cc80c932e7900fa26a3073a67aa7eb011bc2ba4924a066")}}}; @@ -2091,13 +2091,13 @@ TEST(function_string_test, function_sm3sum_test) { { InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY, PrimitiveType::TYPE_VARBINARY}; DataSet data_set = { - {{std::string("asd"), std::string("你好")}, + {{VARBINARY("asd"), VARBINARY("你好")}, {std::string("0d6b9dfa8fe5708eb0dccfbaff4f2964abaaa976cc4445a7ecace49c0ceb31d3")}}, - {{std::string("hello "), std::string("world")}, + {{VARBINARY("hello "), VARBINARY("world")}, {std::string("44f0061e69fa6fdfc290c494654a05dc0c053da7e5c52b84ef93a9d67d3fff88")}}, - {{std::string("HELLO "), std::string(",!^%")}, + {{VARBINARY("HELLO "), VARBINARY(",!^%")}, {std::string("1f5866e786ebac9ffed0dbd8f2586e3e99d1d05f7efe7c5915478b57b7423570")}}, - {{Null(), std::string("HELLO")}, {Null()}}}; + {{Null(), VARBINARY("HELLO")}, {Null()}}}; check_function_all_arg_comb(func_name, input_types, data_set); } @@ -2106,13 +2106,13 @@ TEST(function_string_test, function_sm3sum_test) { InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY, PrimitiveType::TYPE_VARBINARY, PrimitiveType::TYPE_VARBINARY}; DataSet data_set = { - {{std::string("a"), std::string("sd"), std::string("你好")}, + {{VARBINARY("a"), VARBINARY("sd"), VARBINARY("你好")}, {std::string("0d6b9dfa8fe5708eb0dccfbaff4f2964abaaa976cc4445a7ecace49c0ceb31d3")}}, - {{std::string(""), std::string(""), std::string("")}, + {{VARBINARY(""), VARBINARY(""), VARBINARY("")}, {std::string("1ab21d8355cfa17f8e61194831e81a8f22bec8c728fefb747ed035eb5082aa2b")}}, - {{std::string("HEL"), std::string("LO,!"), std::string("^%")}, + {{VARBINARY("HEL"), VARBINARY("LO,!"), VARBINARY("^%")}, {std::string("5fc6e38f40b31a659a59e1daba9b68263615f20c02037b419d9deb3509e6b5c6")}}, - {{Null(), std::string("HELLO"), Null()}, {Null()}}}; + {{Null(), VARBINARY("HELLO"), Null()}, {Null()}}}; check_function_all_arg_comb(func_name, input_types, data_set); } @@ -3789,11 +3789,11 @@ TEST(function_string_test, function_sha1_test) { { InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY}; DataSet data_set = { - {{std::string("hello world")}, + {{VARBINARY("hello world")}, {std::string("2aae6c35c94fcfb415dbe95f408b9ce91ee846ed")}}, - {{std::string("doris")}, {std::string("c29bb8e55610dcfecabb065ce5d01be6e3e810e9")}}, - {{std::string("")}, {std::string("da39a3ee5e6b4b0d3255bfef95601890afd80709")}}, - {{std::string("abc")}, {std::string("a9993e364706816aba3e25717850c26c9cd0d89d")}}, + {{VARBINARY("doris")}, {std::string("c29bb8e55610dcfecabb065ce5d01be6e3e810e9")}}, + {{VARBINARY("")}, {std::string("da39a3ee5e6b4b0d3255bfef95601890afd80709")}}, + {{VARBINARY("abc")}, {std::string("a9993e364706816aba3e25717850c26c9cd0d89d")}}, {{Null()}, {Null()}}}; check_function_all_arg_comb(func_name, input_types, data_set); diff --git a/be/test/vec/function/function_test_util.cpp b/be/test/vec/function/function_test_util.cpp index ae111c9d10f115..53e8e2d8571800 100644 --- a/be/test/vec/function/function_test_util.cpp +++ b/be/test/vec/function/function_test_util.cpp @@ -382,8 +382,8 @@ bool insert_cell(MutableColumnPtr& column, DataTypePtr type_ptr, const AnyType& break; } case PrimitiveType::TYPE_VARBINARY: { - auto str = any_cast(cell); - column->insert_data(str.c_str(), str.size()); + auto binary = any_cast(cell); + column->insert_data(binary.data(), binary.size()); break; } case PrimitiveType::TYPE_JSONB: { diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index 663bc15cd46853..7e2799c9dd5704 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -59,6 +59,7 @@ #include "vec/data_types/data_type_string.h" #include "vec/data_types/data_type_struct.h" #include "vec/data_types/data_type_time.h" +#include "vec/data_types/data_type_varbinary.h" #include "vec/functions/simple_function_factory.h" namespace doris::vectorized { @@ -103,6 +104,8 @@ using VARCHAR = std::string; using CHAR = std::string; using STRING = std::string; +using VARBINARY = doris::StringView; + using DOUBLE = double; using FLOAT = float; @@ -129,6 +132,11 @@ struct ut_input_type { inline static type default_value = "test_default"; }; template <> +struct ut_input_type { + using type = doris::StringView; + inline static type default_value = doris::StringView("test_default"); +}; +template <> struct ut_input_type { using type = std::string; inline static type default_value = "1970-01-01"; @@ -275,6 +283,10 @@ DataTypePtr get_return_type_descriptor(int scale, int precision) { } } +inline std::string debug_hex_string(const std::string& str) { + return ut_type::VARBINARY(str).dump_hex(); +} + struct Consted { PrimitiveType tp; }; @@ -444,11 +456,21 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty << ", expected result: " << result_type_ptr->to_string(*expected_col_ptr, i); } else { auto comp_res = column->compare_at(i, i, *expected_col_ptr, 1); - EXPECT_EQ(0, comp_res) - << ", function " << func_name << ". input row:\n" - << block.dump_data(i, 1) - << "result: " << block.get_data_types()[result]->to_string(*column, i) - << ", expected result: " << result_type_ptr->to_string(*expected_col_ptr, i); + if (std::is_same_v) { + EXPECT_EQ(0, comp_res) + << ", function " << func_name << ". input row:\n" + << block.dump_data(i, 1) << "result: " + << debug_hex_string(block.get_data_types()[result]->to_string(*column, i)) + << ", expected result: " + << debug_hex_string(result_type_ptr->to_string(*expected_col_ptr, i)); + } else { + EXPECT_EQ(0, comp_res) + << ", function " << func_name << ". input row:\n" + << block.dump_data(i, 1) + << "result: " << block.get_data_types()[result]->to_string(*column, i) + << ", expected result: " + << result_type_ptr->to_string(*expected_col_ptr, i); + } } } diff --git a/be/test/vec/function/function_varbinary_test.cpp b/be/test/vec/function/function_varbinary_test.cpp new file mode 100644 index 00000000000000..50baa3c0e51616 --- /dev/null +++ b/be/test/vec/function/function_varbinary_test.cpp @@ -0,0 +1,426 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/functions/function_varbinary.h" + +#include "function_test_util.h" +#include "vec/data_types/data_type_varbinary.h" + +namespace doris::vectorized { + +using namespace ut_type; + +TEST(function_binary_test, function_binary_length_test) { + std::string func_name = "length"; + InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY}; + DataSet data_set = { + {{VARBINARY("YXNk5L2g5aW9")}, std::int32_t(12)}, + {{VARBINARY("aGVsbG8gd29ybGQ")}, std::int32_t(15)}, + {{VARBINARY("SEVMTE8sIV4l")}, std::int32_t(12)}, + {{VARBINARY("__123hehe1")}, std::int32_t(10)}, + {{VARBINARY("")}, std::int32_t(0)}, + {{VARBINARY("5ZWK5ZOI5ZOI5ZOI8J+YhCDjgILigJTigJQh")}, std::int32_t(36)}, + {{VARBINARY("ò&ø")}, std::int32_t(5)}, + {{VARBINARY("TVl0ZXN0U1RS")}, std::int32_t(12)}, + {{VARBINARY("123321!@#@$!@%!@#!@$!@")}, std::int32_t(22)}, + {{VARBINARY("123")}, std::int32_t(3)}, + {{VARBINARY("Hello, World!")}, std::int32_t(13)}, + {{VARBINARY("Привет, мир!")}, std::int32_t(21)}, + {{VARBINARY("こんにちは世界")}, std::int32_t(21)}, + {{VARBINARY("안녕하세요세계")}, std::int32_t(21)}, + {{VARBINARY("你好,世界!")}, std::int32_t(18)}, + {{VARBINARY("مرحبا بالعالم!")}, std::int32_t(26)}, + {{VARBINARY("1234567890")}, std::int32_t(10)}, + {{VARBINARY("👨‍👨‍👧‍👦")}, std::int32_t(25)}, + {{VARBINARY("🇺🇸🇨🇳🇯🇵🇰🇷")}, std::int32_t(32)}, + {{VARBINARY("\u00F1")}, std::int32_t(2)}, + {{VARBINARY("\u65E5\u672C\u8A9E")}, std::int32_t(9)}, + {{VARBINARY("Hello, 世界!")}, std::int32_t(16)}, + {{VARBINARY("😀😃😄😁")}, std::int32_t(16)}, + {{VARBINARY("Quick brown 狐 jumps over a lazy 狗.")}, std::int32_t(38)}, + {{VARBINARY("Löwe 老虎 Léopard")}, std::int32_t(21)}, + {{VARBINARY("Café 美丽")}, std::int32_t(12)}, + {{VARBINARY("Björk")}, std::int32_t(6)}, + {{VARBINARY("¿Dónde está la biblioteca?")}, std::int32_t(29)}, + {{VARBINARY("Zażółć gęślą jaźń")}, std::int32_t(26)}, + {{Null()}, Null()}, + {{VARBINARY(" ")}, std::int32_t(1)}, + {{VARBINARY(" ")}, std::int32_t(2)}, + + }; + + check_function_all_arg_comb(func_name, input_types, data_set); +} + +TEST(function_binary_test, function_to_base64_binary_test) { + std::string func_name = "to_base64_binary"; + InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY}; + + DataSet data_set = { + {{VARBINARY("ABC")}, std::string("QUJD")}, + {{VARBINARY("ABB")}, std::string("QUJC")}, + {{VARBINARY("HEHE")}, std::string("SEVIRQ==")}, + {{VARBINARY("__123hehe1")}, std::string("X18xMjNoZWhlMQ==")}, + {{VARBINARY("5ZWK5ZOI5ZOI5ZOI8J+YhCDjgILigJTigJQh")}, + std::string("NVpXSzVaT0k1Wk9JNVpPSThKK1loQ0RqZ0lMaWdKVGlnSlFo")}, + {{VARBINARY("ò&ø")}, std::string("w7Imw7g=")}, + {{VARBINARY("hehe")}, std::string("aGVoZQ==")}, + {{VARBINARY("`~!@#$%^&*()-_=+")}, std::string("YH4hQCMkJV4mKigpLV89Kw==")}, + {{VARBINARY("test ")}, std::string("dGVzdCA=")}, + {{VARBINARY("")}, std::string("")}, + {{Null()}, Null()}, + }; + + check_function_all_arg_comb(func_name, input_types, data_set); +} + +TEST(function_binary_test, function_from_base64_binary_test) { + std::string func_name = "from_base64_binary"; + InputTypeSet input_types = {PrimitiveType::TYPE_VARCHAR}; + + DataSet data_set = { + {{std::string("YXNk5L2g5aW9")}, VARBINARY("asd你好")}, + {{std::string("aGVsbG8gd29ybGQ")}, Null()}, + {{std::string("SEVMTE8sIV4l")}, VARBINARY("HELLO,!^%")}, + {{std::string("__123hehe1")}, Null()}, + {{std::string("")}, VARBINARY("")}, + {{std::string("5ZWK5ZOI5ZOI5ZOI8J+YhCDjgILigJTigJQh")}, VARBINARY("啊哈哈哈😄 。——!")}, + {{std::string("ò&ø")}, Null()}, + {{std::string("TVl0ZXN0U1RS")}, VARBINARY("MYtestSTR")}, + {{std::string("YWFhYWFhYWFhYWE=")}, VARBINARY("aaaaaaaaaaa")}, + {{std::string("TVl0ZXN0U1RSTVl0ZXN0U1RSTVl0ZXN0U1RS")}, + VARBINARY("MYtestSTRMYtestSTRMYtestSTR")}, + {{std::string("SEVMTE8sIV4lSEVMTE8sIV4lSEVMTE8sIV4lSEVMTE8sIV4l")}, + VARBINARY("HELLO,!^%HELLO,!^%HELLO,!^%HELLO,!^%")}, + {{Null()}, Null()}, + }; + + check_function_all_arg_comb(func_name, input_types, data_set); +} + +TEST(function_binary_test, function_subbinary_test) { + std::string func_name = "sub_binary"; + + { + InputTypeSet input_types = {PrimitiveType::TYPE_VARBINARY, PrimitiveType::TYPE_INT, + PrimitiveType::TYPE_INT}; + + DataSet data_set = { + {{VARBINARY("AbCdEfg"), std::int32_t(1), std::int32_t(1)}, VARBINARY("A")}, + {{VARBINARY("AbCdEfg"), std::int32_t(1), std::int32_t(5)}, VARBINARY("AbCdE")}, + {{VARBINARY("AbCdEfg"), std::int32_t(1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("AbCdEfg"), std::int32_t(1), std::int32_t(100)}, VARBINARY("AbCdEfg")}, + {{VARBINARY("AbCdEfg"), std::int32_t(1), Null()}, Null()}, + {{VARBINARY("AbCdEfg"), std::int32_t(5), std::int32_t(1)}, VARBINARY("E")}, + {{VARBINARY("AbCdEfg"), std::int32_t(5), std::int32_t(5)}, VARBINARY("Efg")}, + {{VARBINARY("AbCdEfg"), std::int32_t(5), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("AbCdEfg"), std::int32_t(5), std::int32_t(100)}, VARBINARY("Efg")}, + {{VARBINARY("AbCdEfg"), std::int32_t(5), Null()}, Null()}, + {{VARBINARY("AbCdEfg"), std::int32_t(-1), std::int32_t(1)}, VARBINARY("g")}, + {{VARBINARY("AbCdEfg"), std::int32_t(-1), std::int32_t(5)}, VARBINARY("g")}, + {{VARBINARY("AbCdEfg"), std::int32_t(-1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("AbCdEfg"), std::int32_t(-1), std::int32_t(100)}, VARBINARY("g")}, + {{VARBINARY("AbCdEfg"), std::int32_t(-1), Null()}, Null()}, + {{VARBINARY("AbCdEfg"), std::int32_t(100), std::int32_t(1)}, VARBINARY("")}, + {{VARBINARY("AbCdEfg"), std::int32_t(100), std::int32_t(5)}, VARBINARY("")}, + {{VARBINARY("AbCdEfg"), std::int32_t(100), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("AbCdEfg"), std::int32_t(100), std::int32_t(100)}, VARBINARY("")}, + {{VARBINARY("AbCdEfg"), std::int32_t(100), Null()}, Null()}, + {{VARBINARY("AbCdEfg"), Null(), std::int32_t(1)}, Null()}, + {{VARBINARY("AbCdEfg"), Null(), std::int32_t(5)}, Null()}, + {{VARBINARY("AbCdEfg"), Null(), std::int32_t(-1)}, Null()}, + {{VARBINARY("AbCdEfg"), Null(), std::int32_t(100)}, Null()}, + {{VARBINARY("AbCdEfg"), Null(), Null()}, Null()}, + {{VARBINARY("HELLO123"), std::int32_t(1), std::int32_t(1)}, VARBINARY("H")}, + {{VARBINARY("HELLO123"), std::int32_t(1), std::int32_t(5)}, VARBINARY("HELLO")}, + {{VARBINARY("HELLO123"), std::int32_t(1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("HELLO123"), std::int32_t(1), std::int32_t(100)}, + VARBINARY("HELLO123")}, + {{VARBINARY("HELLO123"), std::int32_t(1), Null()}, Null()}, + {{VARBINARY("HELLO123"), std::int32_t(5), std::int32_t(1)}, VARBINARY("O")}, + {{VARBINARY("HELLO123"), std::int32_t(5), std::int32_t(5)}, VARBINARY("O123")}, + {{VARBINARY("HELLO123"), std::int32_t(5), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("HELLO123"), std::int32_t(5), std::int32_t(100)}, VARBINARY("O123")}, + {{VARBINARY("HELLO123"), std::int32_t(5), Null()}, Null()}, + {{VARBINARY("HELLO123"), std::int32_t(-1), std::int32_t(1)}, VARBINARY("3")}, + {{VARBINARY("HELLO123"), std::int32_t(-1), std::int32_t(5)}, VARBINARY("3")}, + {{VARBINARY("HELLO123"), std::int32_t(-1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("HELLO123"), std::int32_t(-1), std::int32_t(100)}, VARBINARY("3")}, + {{VARBINARY("HELLO123"), std::int32_t(-1), Null()}, Null()}, + {{VARBINARY("HELLO123"), std::int32_t(100), std::int32_t(1)}, VARBINARY("")}, + {{VARBINARY("HELLO123"), std::int32_t(100), std::int32_t(5)}, VARBINARY("")}, + {{VARBINARY("HELLO123"), std::int32_t(100), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("HELLO123"), std::int32_t(100), std::int32_t(100)}, VARBINARY("")}, + {{VARBINARY("HELLO123"), std::int32_t(100), Null()}, Null()}, + {{VARBINARY("HELLO123"), Null(), std::int32_t(1)}, Null()}, + {{VARBINARY("HELLO123"), Null(), std::int32_t(5)}, Null()}, + {{VARBINARY("HELLO123"), Null(), std::int32_t(-1)}, Null()}, + {{VARBINARY("HELLO123"), Null(), std::int32_t(100)}, Null()}, + {{VARBINARY("HELLO123"), Null(), Null()}, Null()}, + // VARBINARY("\xE4\xBD\xA0\xE5\xA5\xBD\x48\x45\x4C\x4C\x4F") == VARBINARY("你好HELLO") + {{VARBINARY("你好HELLO"), std::int32_t(1), std::int32_t(1)}, VARBINARY("\xE4")}, + {{VARBINARY("你好HELLO"), std::int32_t(1), std::int32_t(5)}, + VARBINARY("\xE4\xBD\xA0\xE5\xA5")}, + {{VARBINARY("你好HELLO"), std::int32_t(1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("你好HELLO"), std::int32_t(1), std::int32_t(100)}, + VARBINARY("你好HELLO")}, + {{VARBINARY("你好HELLO"), std::int32_t(1), Null()}, Null()}, + {{VARBINARY("你好HELLO"), std::int32_t(5), std::int32_t(1)}, VARBINARY("\xA5")}, + {{VARBINARY("你好HELLO"), std::int32_t(5), std::int32_t(5)}, + VARBINARY("\xA5\xBD\x48\x45\x4C")}, + {{VARBINARY("你好HELLO"), std::int32_t(5), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("你好HELLO"), std::int32_t(5), std::int32_t(100)}, + VARBINARY("\xA5\xBDHELLO")}, + {{VARBINARY("你好HELLO"), std::int32_t(5), Null()}, Null()}, + {{VARBINARY("你好HELLO"), std::int32_t(-1), std::int32_t(1)}, VARBINARY("O")}, + {{VARBINARY("你好HELLO"), std::int32_t(-1), std::int32_t(5)}, VARBINARY("O")}, + {{VARBINARY("你好HELLO"), std::int32_t(-1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("你好HELLO"), std::int32_t(-1), std::int32_t(100)}, VARBINARY("O")}, + {{VARBINARY("你好HELLO"), std::int32_t(-1), Null()}, Null()}, + {{VARBINARY("你好HELLO"), std::int32_t(100), std::int32_t(1)}, VARBINARY("")}, + {{VARBINARY("你好HELLO"), std::int32_t(100), std::int32_t(5)}, VARBINARY("")}, + {{VARBINARY("你好HELLO"), std::int32_t(100), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("你好HELLO"), std::int32_t(100), std::int32_t(100)}, VARBINARY("")}, + {{VARBINARY("你好HELLO"), std::int32_t(100), Null()}, Null()}, + {{VARBINARY("你好HELLO"), Null(), std::int32_t(1)}, Null()}, + {{VARBINARY("你好HELLO"), Null(), std::int32_t(5)}, Null()}, + {{VARBINARY("你好HELLO"), Null(), std::int32_t(-1)}, Null()}, + {{VARBINARY("你好HELLO"), Null(), std::int32_t(100)}, Null()}, + {{VARBINARY("你好HELLO"), Null(), Null()}, Null()}, + {{VARBINARY("123ABC_"), std::int32_t(1), std::int32_t(1)}, VARBINARY("1")}, + {{VARBINARY("123ABC_"), std::int32_t(1), std::int32_t(5)}, VARBINARY("123AB")}, + {{VARBINARY("123ABC_"), std::int32_t(1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("123ABC_"), std::int32_t(1), std::int32_t(100)}, VARBINARY("123ABC_")}, + {{VARBINARY("123ABC_"), std::int32_t(1), Null()}, Null()}, + {{VARBINARY("123ABC_"), std::int32_t(5), std::int32_t(1)}, VARBINARY("B")}, + {{VARBINARY("123ABC_"), std::int32_t(5), std::int32_t(5)}, VARBINARY("BC_")}, + {{VARBINARY("123ABC_"), std::int32_t(5), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("123ABC_"), std::int32_t(5), std::int32_t(100)}, VARBINARY("BC_")}, + {{VARBINARY("123ABC_"), std::int32_t(5), Null()}, Null()}, + {{VARBINARY("123ABC_"), std::int32_t(-1), std::int32_t(1)}, VARBINARY("_")}, + {{VARBINARY("123ABC_"), std::int32_t(-1), std::int32_t(5)}, VARBINARY("_")}, + {{VARBINARY("123ABC_"), std::int32_t(-1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("123ABC_"), std::int32_t(-1), std::int32_t(100)}, VARBINARY("_")}, + {{VARBINARY("123ABC_"), std::int32_t(-1), Null()}, Null()}, + {{VARBINARY("123ABC_"), std::int32_t(100), std::int32_t(1)}, VARBINARY("")}, + {{VARBINARY("123ABC_"), std::int32_t(100), std::int32_t(5)}, VARBINARY("")}, + {{VARBINARY("123ABC_"), std::int32_t(100), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("123ABC_"), std::int32_t(100), std::int32_t(100)}, VARBINARY("")}, + {{VARBINARY("123ABC_"), std::int32_t(100), Null()}, Null()}, + {{VARBINARY("123ABC_"), Null(), std::int32_t(1)}, Null()}, + {{VARBINARY("123ABC_"), Null(), std::int32_t(5)}, Null()}, + {{VARBINARY("123ABC_"), Null(), std::int32_t(-1)}, Null()}, + {{VARBINARY("123ABC_"), Null(), std::int32_t(100)}, Null()}, + {{VARBINARY("123ABC_"), Null(), Null()}, Null()}, + {{VARBINARY("MYtestSTR"), std::int32_t(1), std::int32_t(1)}, VARBINARY("M")}, + {{VARBINARY("MYtestSTR"), std::int32_t(1), std::int32_t(5)}, VARBINARY("MYtes")}, + {{VARBINARY("MYtestSTR"), std::int32_t(1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("MYtestSTR"), std::int32_t(1), std::int32_t(100)}, + VARBINARY("MYtestSTR")}, + {{VARBINARY("MYtestSTR"), std::int32_t(1), Null()}, Null()}, + {{VARBINARY("MYtestSTR"), std::int32_t(5), std::int32_t(1)}, VARBINARY("s")}, + {{VARBINARY("MYtestSTR"), std::int32_t(5), std::int32_t(5)}, VARBINARY("stSTR")}, + {{VARBINARY("MYtestSTR"), std::int32_t(5), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("MYtestSTR"), std::int32_t(5), std::int32_t(100)}, VARBINARY("stSTR")}, + {{VARBINARY("MYtestSTR"), std::int32_t(5), Null()}, Null()}, + {{VARBINARY("MYtestSTR"), std::int32_t(-1), std::int32_t(1)}, VARBINARY("R")}, + {{VARBINARY("MYtestSTR"), std::int32_t(-1), std::int32_t(5)}, VARBINARY("R")}, + {{VARBINARY("MYtestSTR"), std::int32_t(-1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("MYtestSTR"), std::int32_t(-1), std::int32_t(100)}, VARBINARY("R")}, + {{VARBINARY("MYtestSTR"), std::int32_t(-1), Null()}, Null()}, + {{VARBINARY("MYtestSTR"), std::int32_t(100), std::int32_t(1)}, VARBINARY("")}, + {{VARBINARY("MYtestSTR"), std::int32_t(100), std::int32_t(5)}, VARBINARY("")}, + {{VARBINARY("MYtestSTR"), std::int32_t(100), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("MYtestSTR"), std::int32_t(100), std::int32_t(100)}, VARBINARY("")}, + {{VARBINARY("MYtestSTR"), std::int32_t(100), Null()}, Null()}, + {{VARBINARY("MYtestSTR"), Null(), std::int32_t(1)}, Null()}, + {{VARBINARY("MYtestSTR"), Null(), std::int32_t(5)}, Null()}, + {{VARBINARY("MYtestSTR"), Null(), std::int32_t(-1)}, Null()}, + {{VARBINARY("MYtestSTR"), Null(), std::int32_t(100)}, Null()}, + {{VARBINARY("MYtestSTR"), Null(), Null()}, Null()}, + {{VARBINARY(""), std::int32_t(1), std::int32_t(1)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(1), std::int32_t(5)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(1), std::int32_t(100)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(1), Null()}, Null()}, + {{VARBINARY(""), std::int32_t(5), std::int32_t(1)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(5), std::int32_t(5)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(5), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(5), std::int32_t(100)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(5), Null()}, Null()}, + {{VARBINARY(""), std::int32_t(-1), std::int32_t(1)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(-1), std::int32_t(5)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(-1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(-1), std::int32_t(100)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(-1), Null()}, Null()}, + {{VARBINARY(""), std::int32_t(100), std::int32_t(1)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(100), std::int32_t(5)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(100), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(100), std::int32_t(100)}, VARBINARY("")}, + {{VARBINARY(""), std::int32_t(100), Null()}, Null()}, + {{VARBINARY(""), Null(), std::int32_t(1)}, Null()}, + {{VARBINARY(""), Null(), std::int32_t(5)}, Null()}, + {{VARBINARY(""), Null(), std::int32_t(-1)}, Null()}, + {{VARBINARY(""), Null(), std::int32_t(100)}, Null()}, + {{VARBINARY(""), Null(), Null()}, Null()}, + {{Null(), std::int32_t(1), std::int32_t(1)}, Null()}, + {{Null(), std::int32_t(1), std::int32_t(5)}, Null()}, + {{Null(), std::int32_t(1), std::int32_t(-1)}, Null()}, + {{Null(), std::int32_t(1), std::int32_t(100)}, Null()}, + {{Null(), std::int32_t(1), Null()}, Null()}, + {{Null(), std::int32_t(5), std::int32_t(1)}, Null()}, + {{Null(), std::int32_t(5), std::int32_t(5)}, Null()}, + {{Null(), std::int32_t(5), std::int32_t(-1)}, Null()}, + {{Null(), std::int32_t(5), std::int32_t(100)}, Null()}, + {{Null(), std::int32_t(5), Null()}, Null()}, + {{Null(), std::int32_t(-1), std::int32_t(1)}, Null()}, + {{Null(), std::int32_t(-1), std::int32_t(5)}, Null()}, + {{Null(), std::int32_t(-1), std::int32_t(-1)}, Null()}, + {{Null(), std::int32_t(-1), std::int32_t(100)}, Null()}, + {{Null(), std::int32_t(-1), Null()}, Null()}, + {{Null(), std::int32_t(100), std::int32_t(1)}, Null()}, + {{Null(), std::int32_t(100), std::int32_t(5)}, Null()}, + {{Null(), std::int32_t(100), std::int32_t(-1)}, Null()}, + {{Null(), std::int32_t(100), std::int32_t(100)}, Null()}, + {{Null(), std::int32_t(100), Null()}, Null()}, + {{Null(), Null(), std::int32_t(1)}, Null()}, + {{Null(), Null(), std::int32_t(5)}, Null()}, + {{Null(), Null(), std::int32_t(-1)}, Null()}, + {{Null(), Null(), std::int32_t(100)}, Null()}, + {{Null(), Null(), Null()}, Null()}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(1), std::int32_t(1)}, VARBINARY("A")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(1), std::int32_t(5)}, VARBINARY("A,b,C")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(1), std::int32_t(100)}, + VARBINARY("A,b,C,D,_E")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(1), Null()}, Null()}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(5), std::int32_t(1)}, VARBINARY("C")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(5), std::int32_t(5)}, VARBINARY("C,D,_")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(5), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(5), std::int32_t(100)}, + VARBINARY("C,D,_E")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(5), Null()}, Null()}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(-1), std::int32_t(1)}, VARBINARY("E")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(-1), std::int32_t(5)}, VARBINARY("E")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(-1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(-1), std::int32_t(100)}, VARBINARY("E")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(-1), Null()}, Null()}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(100), std::int32_t(1)}, VARBINARY("")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(100), std::int32_t(5)}, VARBINARY("")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(100), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(100), std::int32_t(100)}, VARBINARY("")}, + {{VARBINARY("A,b,C,D,_E"), std::int32_t(100), Null()}, Null()}, + {{VARBINARY("A,b,C,D,_E"), Null(), std::int32_t(1)}, Null()}, + {{VARBINARY("A,b,C,D,_E"), Null(), std::int32_t(5)}, Null()}, + {{VARBINARY("A,b,C,D,_E"), Null(), std::int32_t(-1)}, Null()}, + {{VARBINARY("A,b,C,D,_E"), Null(), std::int32_t(100)}, Null()}, + {{VARBINARY("A,b,C,D,_E"), Null(), Null()}, Null()}, + {{VARBINARY("1234321312312"), std::int32_t(1), std::int32_t(1)}, VARBINARY("1")}, + {{VARBINARY("1234321312312"), std::int32_t(1), std::int32_t(5)}, + VARBINARY("12343")}, + {{VARBINARY("1234321312312"), std::int32_t(1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("1234321312312"), std::int32_t(1), std::int32_t(100)}, + VARBINARY("1234321312312")}, + {{VARBINARY("1234321312312"), std::int32_t(1), Null()}, Null()}, + {{VARBINARY("1234321312312"), std::int32_t(5), std::int32_t(1)}, VARBINARY("3")}, + {{VARBINARY("1234321312312"), std::int32_t(5), std::int32_t(5)}, + VARBINARY("32131")}, + {{VARBINARY("1234321312312"), std::int32_t(5), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("1234321312312"), std::int32_t(5), std::int32_t(100)}, + VARBINARY("321312312")}, + {{VARBINARY("1234321312312"), std::int32_t(5), Null()}, Null()}, + {{VARBINARY("1234321312312"), std::int32_t(-1), std::int32_t(1)}, VARBINARY("2")}, + {{VARBINARY("1234321312312"), std::int32_t(-1), std::int32_t(5)}, VARBINARY("2")}, + {{VARBINARY("1234321312312"), std::int32_t(-1), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("1234321312312"), std::int32_t(-1), std::int32_t(100)}, VARBINARY("2")}, + {{VARBINARY("1234321312312"), std::int32_t(-1), Null()}, Null()}, + {{VARBINARY("1234321312312"), std::int32_t(100), std::int32_t(1)}, VARBINARY("")}, + {{VARBINARY("1234321312312"), std::int32_t(100), std::int32_t(5)}, VARBINARY("")}, + {{VARBINARY("1234321312312"), std::int32_t(100), std::int32_t(-1)}, VARBINARY("")}, + {{VARBINARY("1234321312312"), std::int32_t(100), std::int32_t(100)}, VARBINARY("")}, + {{VARBINARY("1234321312312"), std::int32_t(100), Null()}, Null()}, + {{VARBINARY("1234321312312"), Null(), std::int32_t(1)}, Null()}, + {{VARBINARY("1234321312312"), Null(), std::int32_t(5)}, Null()}, + {{VARBINARY("1234321312312"), Null(), std::int32_t(-1)}, Null()}, + {{VARBINARY("1234321312312"), Null(), std::int32_t(100)}, Null()}, + {{VARBINARY("1234321312312"), Null(), Null()}, Null()}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(1), std::int32_t(1)}, + VARBINARY("h")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(1), std::int32_t(5)}, + VARBINARY("heh1h")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(1), std::int32_t(-1)}, + VARBINARY("")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(1), std::int32_t(100)}, + VARBINARY("heh1h2_!u@_u@i$o%ll_")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(1), Null()}, Null()}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(5), std::int32_t(1)}, + VARBINARY("h")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(5), std::int32_t(5)}, + VARBINARY("h2_!u")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(5), std::int32_t(-1)}, + VARBINARY("")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(5), std::int32_t(100)}, + VARBINARY("h2_!u@_u@i$o%ll_")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(5), Null()}, Null()}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(-1), std::int32_t(1)}, + VARBINARY("_")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(-1), std::int32_t(5)}, + VARBINARY("_")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(-1), std::int32_t(-1)}, + VARBINARY("")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(-1), std::int32_t(100)}, + VARBINARY("_")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(-1), Null()}, Null()}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(100), std::int32_t(1)}, + VARBINARY("")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(100), std::int32_t(5)}, + VARBINARY("")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(100), std::int32_t(-1)}, + VARBINARY("")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(100), std::int32_t(100)}, + VARBINARY("")}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), std::int32_t(100), Null()}, Null()}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), Null(), std::int32_t(1)}, Null()}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), Null(), std::int32_t(5)}, Null()}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), Null(), std::int32_t(-1)}, Null()}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), Null(), std::int32_t(100)}, Null()}, + {{VARBINARY("heh1h2_!u@_u@i$o%ll_"), Null(), Null()}, Null()}, + }; + + check_function_all_arg_comb(func_name, input_types, data_set); + } +} + +TEST(function_binary_test, function_to_binary_test) { + std::string func_name = "to_binary"; + InputTypeSet input_types = {PrimitiveType::TYPE_VARCHAR}; + + DataSet data_set = { + {{std::string("48656c6c6f")}, VARBINARY("Hello")}, + {{std::string("0001")}, VARBINARY(std::string_view("\x00\x01", 2))}, + {{std::string("aaff")}, VARBINARY("\xAA\xFF")}, + {{std::string("aGVsbG8gd29ybGQ")}, Null()}, + {{std::string("a")}, Null()}, + {{std::string("__123hehe1")}, Null()}, + {{std::string("")}, Null()}, + {{Null()}, Null()}, + }; + + check_function_all_arg_comb(func_name, input_types, data_set); +} + +} // namespace doris::vectorized diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java index d69322818a28cc..3074cd112dc182 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java @@ -210,6 +210,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.FormatRound; import org.apache.doris.nereids.trees.expressions.functions.scalar.Fpow; import org.apache.doris.nereids.trees.expressions.functions.scalar.FromBase64; +import org.apache.doris.nereids.trees.expressions.functions.scalar.FromBase64Binary; import org.apache.doris.nereids.trees.expressions.functions.scalar.FromBinary; import org.apache.doris.nereids.trees.expressions.functions.scalar.FromDays; import org.apache.doris.nereids.trees.expressions.functions.scalar.FromIso8601Date; @@ -463,6 +464,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToMap; import org.apache.doris.nereids.trees.expressions.functions.scalar.Strcmp; import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement; +import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBinary; import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBitmap; import org.apache.doris.nereids.trees.expressions.functions.scalar.SubReplace; import org.apache.doris.nereids.trees.expressions.functions.scalar.Substring; @@ -474,6 +476,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.TimeToSec; import org.apache.doris.nereids.trees.expressions.functions.scalar.Timestamp; import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBase64; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBase64Binary; import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBinary; import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBitmap; import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBitmapWithCheck; @@ -719,6 +722,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(FormatNumber.class, "format_number"), scalar(Fpow.class, "fpow"), scalar(FromBase64.class, "from_base64"), + scalar(FromBase64Binary.class, "from_base64_binary"), scalar(FromBinary.class, "from_binary", "from_hex"), scalar(FromDays.class, "from_days"), scalar(FromIso8601Date.class, "from_iso8601_date"), @@ -979,6 +983,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(Strcmp.class, "strcmp"), scalar(StrToDate.class, "str_to_date"), scalar(StrToMap.class, "str_to_map"), + scalar(SubBinary.class, "sub_binary"), scalar(SubBitmap.class, "sub_bitmap"), scalar(SubReplace.class, "sub_replace"), scalar(Substring.class, "substr", "substring", "mid"), @@ -990,6 +995,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(TimeToSec.class, "time_to_sec"), scalar(Timestamp.class, "timestamp"), scalar(ToBase64.class, "to_base64"), + scalar(ToBase64Binary.class, "to_base64_binary"), scalar(ToBinary.class, "to_binary", "to_hex"), scalar(ToBitmap.class, "to_bitmap"), scalar(ToBitmapWithCheck.class, "to_bitmap_with_check"), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/FromBase64Binary.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/FromBase64Binary.java new file mode 100644 index 00000000000000..81b858b63f8973 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/FromBase64Binary.java @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral; +import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VarBinaryType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'from_base64_binary'. + */ +public class FromBase64Binary extends ScalarFunction + implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNullable, PropagateNullLiteral { + public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(VarBinaryType.INSTANCE).args(StringType.INSTANCE)); + + /** + * constructor with 1 argument. + */ + public FromBase64Binary(Expression arg) { + super("from_base64_binary", arg); + } + + /** constructor for withChildren and reuse signature */ + private FromBase64Binary(ScalarFunctionParams functionParams) { + super(functionParams); + } + + /** + * withChildren. + */ + @Override + public FromBase64Binary withChildren(List children) { + Preconditions.checkArgument(children.size() == 1); + return new FromBase64Binary(getFunctionParams(children)); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitFromBase64Binary(this, context); + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Length.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Length.java index 6bb8bc9c3dd5ca..35fc1b49aef19e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Length.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Length.java @@ -25,6 +25,7 @@ import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.IntegerType; import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VarBinaryType; import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; @@ -40,8 +41,8 @@ public class Length extends ScalarFunction public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(IntegerType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT), - FunctionSignature.ret(IntegerType.INSTANCE).args(StringType.INSTANCE) - ); + FunctionSignature.ret(IntegerType.INSTANCE).args(StringType.INSTANCE), + FunctionSignature.ret(IntegerType.INSTANCE).args(VarBinaryType.INSTANCE)); /** * constructor with 1 argument. diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SubBinary.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SubBinary.java new file mode 100644 index 00000000000000..5fc67aca98ed74 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SubBinary.java @@ -0,0 +1,94 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; +import org.apache.doris.nereids.trees.expressions.literal.Literal; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.IntegerType; +import org.apache.doris.nereids.types.VarBinaryType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.Optional; + +/** + * ScalarFunction 'sub_binary'. + */ +public class SubBinary extends ScalarFunction + implements ExplicitlyCastableSignature, PropagateNullable { + public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(VarBinaryType.INSTANCE).args(VarBinaryType.INSTANCE, IntegerType.INSTANCE), + FunctionSignature.ret(VarBinaryType.INSTANCE) + .args(VarBinaryType.INSTANCE, IntegerType.INSTANCE, IntegerType.INSTANCE)); + + /** + * constructor with 2 arguments. + */ + public SubBinary(Expression arg0, Expression arg1) { + super("sub_binary", arg0, arg1, Literal.of(Integer.MAX_VALUE)); + } + + /** + * constructor with 3 arguments. + */ + public SubBinary(Expression arg0, Expression arg1, Expression arg2) { + super("sub_binary", arg0, arg1, arg2); + } + + /** constructor for withChildren and reuse signature */ + private SubBinary(ScalarFunctionParams functionParams) { + super(functionParams); + } + + public Expression getSource() { + return child(0); + } + + public Expression getPosition() { + return child(1); + } + + public Optional getLength() { + return arity() == 3 ? Optional.of(child(2)) : Optional.empty(); + } + + /** + * withChildren. + */ + @Override + public SubBinary withChildren(List children) { + Preconditions.checkArgument(children.size() == 2 || children.size() == 3); + return new SubBinary(getFunctionParams(children)); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitSubBinary(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBase64Binary.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBase64Binary.java new file mode 100644 index 00000000000000..fe07fecff0c452 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ToBase64Binary.java @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; +import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VarBinaryType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'to_base64_binary'. + */ +public class ToBase64Binary extends ScalarFunction + implements UnaryExpression, ExplicitlyCastableSignature, PropagateNullable { + + public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(StringType.INSTANCE).args(VarBinaryType.INSTANCE)); + + /** + * constructor with 1 argument. + */ + public ToBase64Binary(Expression arg) { + super("to_base64_binary", arg); + } + + /** constructor for withChildren and reuse signature */ + private ToBase64Binary(ScalarFunctionParams functionParams) { + super(functionParams); + } + + /** + * withChildren. + */ + @Override + public ToBase64Binary withChildren(List children) { + Preconditions.checkArgument(children.size() == 1); + return new ToBase64Binary(getFunctionParams(children)); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitToBase64Binary(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java index fbfa0d5bce2a82..8ce00fc6f1f6a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java @@ -220,6 +220,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.FormatRound; import org.apache.doris.nereids.trees.expressions.functions.scalar.Fpow; import org.apache.doris.nereids.trees.expressions.functions.scalar.FromBase64; +import org.apache.doris.nereids.trees.expressions.functions.scalar.FromBase64Binary; import org.apache.doris.nereids.trees.expressions.functions.scalar.FromBinary; import org.apache.doris.nereids.trees.expressions.functions.scalar.FromDays; import org.apache.doris.nereids.trees.expressions.functions.scalar.FromIso8601Date; @@ -463,6 +464,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.StrToMap; import org.apache.doris.nereids.trees.expressions.functions.scalar.Strcmp; import org.apache.doris.nereids.trees.expressions.functions.scalar.StructElement; +import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBinary; import org.apache.doris.nereids.trees.expressions.functions.scalar.SubBitmap; import org.apache.doris.nereids.trees.expressions.functions.scalar.SubReplace; import org.apache.doris.nereids.trees.expressions.functions.scalar.Substring; @@ -473,6 +475,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.TimeDiff; import org.apache.doris.nereids.trees.expressions.functions.scalar.Timestamp; import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBase64; +import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBase64Binary; import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBinary; import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBitmap; import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBitmapWithCheck; @@ -1313,6 +1316,10 @@ default R visitFromBase64(FromBase64 fromBase64, C context) { return visitScalarFunction(fromBase64, context); } + default R visitFromBase64Binary(FromBase64Binary fromBase64Binary, C context) { + return visitScalarFunction(fromBase64Binary, context); + } + default R visitFromBinary(FromBinary fromBinary, C context) { return visitScalarFunction(fromBinary, context); } @@ -2205,6 +2212,10 @@ default R visitStringRegexPredicate(StringRegexPredicate stringRegexPredicate, C return visitScalarFunction(stringRegexPredicate, context); } + default R visitSubBinary(SubBinary subBinary, C context) { + return visitScalarFunction(subBinary, context); + } + default R visitSubBitmap(SubBitmap subBitmap, C context) { return visitScalarFunction(subBitmap, context); } @@ -2257,6 +2268,10 @@ default R visitToBase64(ToBase64 toBase64, C context) { return visitScalarFunction(toBase64, context); } + default R visitToBase64Binary(ToBase64Binary toBase64Binary, C context) { + return visitScalarFunction(toBase64Binary, context); + } + default R visitToBinary(ToBinary toBinary, C context) { return visitScalarFunction(toBinary, context); } diff --git a/regression-test/suites/query_p0/sql_functions/binary_functions/test_binary_function.groovy b/regression-test/suites/query_p0/sql_functions/binary_functions/test_binary_function.groovy new file mode 100644 index 00000000000000..0959adefea10cc --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/binary_functions/test_binary_function.groovy @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_binary_function", "p0,external,mysql,external_docker,external_docker_mysql") { + String enabled = context.config.otherConfigs.get("enableJdbcTest") + + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String s3_endpoint = getS3Endpoint() + String bucket = getS3BucketName() + String driver_url = "https://${bucket}.${s3_endpoint}/regression/jdbc_driver/mysql-connector-java-8.0.25.jar" + String catalog_name = "mysql_varbinary_catalog"; + String ex_db_name = "doris_test"; + String mysql_port = context.config.otherConfigs.get("mysql_57_port"); + String test_table = "binary_test"; + + + + sql """drop catalog if exists ${catalog_name}""" + + sql """create catalog if not exists ${catalog_name} properties( + "type"="jdbc", + "user"="root", + "password"="123456", + "jdbc_url" = "jdbc:mysql://${externalEnvIp}:${mysql_port}/doris_test?useSSL=false", + "driver_url" = "${driver_url}", + "driver_class" = "com.mysql.cj.jdbc.Driver" + );""" + + connect("root", "123456", "jdbc:mysql://${externalEnvIp}:${mysql_port}/doris_test?useSSL=false") { + try_sql """DROP TABLE IF EXISTS ${test_table}""" + + sql """CREATE TABLE ${test_table} ( + id int, + vb varbinary(100), + vc VARCHAR(100) + )""" + + sql """INSERT INTO ${test_table} VALUES + (1, 'hello world', 'hello world'), + (2, '', ''), + (3, 'special chars: !@#%', 'special chars: !@#%'), + (4, '__123hehe1', '__123hehe1'), + (5, 'ABB', 'ABB'), + (6, '5ZWK5ZOI5ZOI5ZOI8J+YhCDjgILigJTigJQh', '5ZWK5ZOI5ZOI5ZOI8J+YhCDjgILigJTigJQh'), + (7, 'SEVMTE8sIV4l', 'SEVMTE8sIV4l') + """ + } + + sql """switch ${catalog_name}""" + sql """use ${ex_db_name}""" + + def length_result = sql """select id, length(vb), length(vc) from ${test_table} order by id""" + for (int i = 0; i < length_result.size(); i++) { + assertTrue(length_result[i][1] == length_result[i][2], + "length mismatch for row ${length_result[i][0]}: VarBinary=${length_result[i][1]}, VARCHAR=${length_result[i][2]}") + } + + def from_base64_result = sql """select id, from_binary(from_base64_binary(vc)), hex(from_base64(vc)) from ${test_table} order by id""" + for (int i = 0; i < from_base64_result.size(); i++) { + def bin = from_base64_result[i][1] + def str = from_base64_result[i][2] + assertTrue(bin == str, + "from_base64 mismatch for row ${from_base64_result[i][0]}: VarBinary=${bin}, VARCHAR=${str}") + } + + def to_base64_result = sql """select id, to_base64_binary(vb), to_base64(vc) from ${test_table} order by id""" + for (int i = 0; i < to_base64_result.size(); i++) { + assertTrue(to_base64_result[i][1] == to_base64_result[i][2], + "to_base64 mismatch for row ${to_base64_result[i][0]}: VarBinary=${to_base64_result[i][1]}, VARCHAR=${to_base64_result[i][2]}") + } + + def sub_binary_3args_result = sql """select id, from_binary(sub_binary(vb, 1, 5)), hex(substr(vc, 1, 5)) from ${test_table} order by id""" + for (int i = 0; i < sub_binary_3args_result.size(); i++) { + def bin = sub_binary_3args_result[i][1] + def str = sub_binary_3args_result[i][2] + assertTrue(bin == str, + "sub_binary_3args mismatch for row ${sub_binary_3args_result[i][0]}: VarBinary=${bin}, VARCHAR=${str}") + } + + def sub_binary_3args_result_2 = sql """select id, from_binary(sub_binary(vb, -1, 5)), hex(substr(vc, -1, 5)) from ${test_table} order by id""" + for (int i = 0; i < sub_binary_3args_result_2.size(); i++) { + def bin = sub_binary_3args_result_2[i][1] + def str = sub_binary_3args_result_2[i][2] + assertTrue(bin == str, + "sub_binary_3args_2 mismatch for row ${sub_binary_3args_result_2[i][0]}: VarBinary=${bin}, VARCHAR=${str}") + } + + def sub_binary_2args_result = sql """select id, from_binary(sub_binary(vb, 1)), hex(substr(vc, 1)) from ${test_table} order by id""" + for (int i = 0; i < sub_binary_2args_result.size(); i++) { + def bin = sub_binary_2args_result[i][1] + def str = sub_binary_2args_result[i][2] + assertTrue(bin == str, + "sub_binary_2args mismatch for row ${sub_binary_2args_result[i][0]}: VarBinary=${bin}, VARCHAR=${str}") + } + + def sub_binary_2args_result_2 = sql """select id, from_binary(sub_binary(vb, -1)), hex(substr(vc, -1)) from ${test_table} order by id""" + for (int i = 0; i < sub_binary_2args_result_2.size(); i++) { + def bin = sub_binary_2args_result_2[i][1] + def str = sub_binary_2args_result_2[i][2] + assertTrue(bin == str, + "sub_binary_2args_2 mismatch for row ${sub_binary_2args_result_2[i][0]}: VarBinary=${bin}, VARCHAR=${str}") + } + + def sub_binary_nest_2args_result = sql """select id, from_binary(sub_binary(sub_binary(vb, 1), 1)), hex(substr(substr(vc, 1), 1)) from ${test_table} order by id""" + for (int i = 0; i < sub_binary_nest_2args_result.size(); i++) { + def bin = sub_binary_nest_2args_result[i][1] + def str = sub_binary_nest_2args_result[i][2] + assertTrue(bin == str, + "sub_binary_nest_2args mismatch for row ${sub_binary_nest_2args_result[i][0]}: VarBinary=${bin}, VARCHAR=${str}") + } + + } +}