Skip to content

Commit

Permalink
Support char_position function (#2080)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?
-  Support char_position function
- Add test cases and examples


### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Test cases
  • Loading branch information
Ami11111 authored Oct 21, 2024
1 parent 5385ede commit 5e89468
Show file tree
Hide file tree
Showing 10 changed files with 329 additions and 38 deletions.
4 changes: 4 additions & 0 deletions example/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@
res = table_obj.output(["*"]).filter("trim(' abc ') = rtrim(ltrim(' abc '))").to_df()
print(res)

#function char_position
res = table_obj.output(["*", "char_position(c1, 'bc')"]).filter("char_position(c1, c1) <> 0").to_df()
print(res)

res = db_obj.drop_table("function_example")

infinity_obj.disconnect()
16 changes: 16 additions & 0 deletions example/http/functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,22 @@ curl --request GET \
"filter": "trim(body) = '\''this is an example for trim'\''"
} '

# show rows of 'tbl1' where char_position(body, '123') = 1
echo -e '\n\n-- show rows of 'tbl1' where char_position(body, '123') = 1'
curl --request GET \
--url http://localhost:23820/databases/default_db/tables/tbl1/docs \
--header 'accept: application/json' \
--header 'content-type: application/json' \
--data '
{
"output":
[
"body"
],
"filter": "char_position(body, '123') = 1"
} '


# drop tbl1
echo -e '\n\n-- drop tbl1'
curl --request DELETE \
Expand Down
95 changes: 93 additions & 2 deletions python/test_pysdk/test_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,8 +816,6 @@ def test_select_varchar_length(self, suffix):
[{"c1": 'a', "c2": 'a'}, {"c1": 'b', "c2": 'b'}, {"c1": 'c', "c2": 'c'}, {"c1": 'd', "c2": 'd'},
{"c1": 'abc', "c2": 'abc'}, {"c1": 'bbc', "c2": 'bbc'}, {"c1": 'cbc', "c2": 'cbc'}, {"c1": 'dbc', "c2": 'dbc'}])



res = table_obj.output(["*"]).filter("char_length(c1) = 1").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('a', 'b', 'c', 'd'),
Expand Down Expand Up @@ -852,4 +850,97 @@ def test_select_regex(self, suffix):


res = db_obj.drop_table("test_select_regex"+suffix)
assert res.error_code == ErrorCode.OK

def test_select_upper_lower(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
db_obj.drop_table("test_select_upper_lower"+suffix, ConflictType.Ignore)
db_obj.create_table("test_select_upper_lower"+suffix,
{"c1": {"type": "varchar", "constraints": ["primary key", "not null"]},
"c2": {"type": "varchar", "constraints": ["not null"]}}, ConflictType.Error)
table_obj = db_obj.get_table("test_select_upper_lower"+suffix)
table_obj.insert(
[{"c1": 'a', "c2": 'A'}, {"c1": 'b', "c2": 'B'}, {"c1": 'c', "c2": 'C'}, {"c1": 'd', "c2": 'D'},
{"c1": 'abc', "c2": 'ABC'}, {"c1": 'bbc', "c2": 'bbc'}, {"c1": 'cbc', "c2": 'cbc'}, {"c1": 'dbc', "c2": 'dbc'},])

res = table_obj.output(["*"]).filter("upper(c1) = c2").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('a', 'b', 'c', 'd', 'abc'),
'c2': ('A', 'B', 'C', 'D', 'ABC')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))


res = db_obj.drop_table("test_select_upper_lower"+suffix)
assert res.error_code == ErrorCode.OK

def test_select_substring(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
db_obj.drop_table("test_select_substring"+suffix, ConflictType.Ignore)
db_obj.create_table("test_select_substring"+suffix,
{"c1": {"type": "varchar", "constraints": ["primary key", "not null"]},
"c2": {"type": "varchar", "constraints": ["not null"]}}, ConflictType.Error)
table_obj = db_obj.get_table("test_select_substring"+suffix)
table_obj.insert(
[{"c1": 'a', "c2": 'A'}, {"c1": 'b', "c2": 'B'}, {"c1": 'c', "c2": 'C'}, {"c1": 'd', "c2": 'D'},
{"c1": 'abc', "c2": 'ABC'}, {"c1": 'bbcc', "c2": 'bbc'}, {"c1": 'cbcc', "c2": 'cbc'}, {"c1": 'dbcc', "c2": 'dbc'},])

res = table_obj.output(["*"]).filter("substring(c1, 0, 3) = c2").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('bbcc', 'cbcc', 'dbcc'),
'c2': ('bbc', 'cbc', 'dbc')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))

res = db_obj.drop_table("test_select_substring"+suffix)
assert res.error_code == ErrorCode.OK

def test_select_trim(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
db_obj.drop_table("test_select_trim"+suffix, ConflictType.Ignore)
db_obj.create_table("test_select_trim"+suffix,
{"c1": {"type": "varchar", "constraints": ["primary key", "not null"]},
"c2": {"type": "varchar", "constraints": ["not null"]}}, ConflictType.Error)
table_obj = db_obj.get_table("test_select_trim"+suffix)
table_obj.insert(
[{"c1": ' a', "c2": 'a'}, {"c1": ' b', "c2": 'b'}, {"c1": ' c', "c2": 'c'},
{"c1": 'ab ', "c2": 'ab'}, {"c1": 'bcc ', "c2": 'bcc'}, {"c1": 'cbc ', "c2": 'cbc'}, {"c1": ' dbc ', "c2": 'dbc'},])

res = table_obj.output(["*"]).filter("ltrim(c1) = c2").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (' a', ' b', ' c'),
'c2': ('a', 'b', 'c')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))

res = table_obj.output(["*"]).filter("rtrim(c1) = c2").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('ab ', 'bcc ', 'cbc '),
'c2': ('ab', 'bcc', 'cbc')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))

res = table_obj.output(["*"]).filter("trim(c1) = c2").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (' a', ' b', ' c', 'ab ', 'bcc ', 'cbc ', ' dbc '),
'c2': ('a', 'b', 'c', 'ab', 'bcc', 'cbc', 'dbc')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))

res = db_obj.drop_table("test_select_trim"+suffix)
assert res.error_code == ErrorCode.OK

def test_select_position(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
db_obj.drop_table("test_select_position"+suffix, ConflictType.Ignore)
db_obj.create_table("test_select_position"+suffix,
{"c1": {"type": "varchar", "constraints": ["primary key", "not null"]},
"c2": {"type": "varchar", "constraints": ["not null"]}}, ConflictType.Error)
table_obj = db_obj.get_table("test_select_position"+suffix)
table_obj.insert(
[{"c1": 'a', "c2": 'A'}, {"c1": 'b', "c2": 'B'}, {"c1": 'c', "c2": 'C'}, {"c1": 'd', "c2": 'D'},
{"c1": 'abc', "c2": 'ABC'}, {"c1": 'bbcc', "c2": 'bbc'}, {"c1": 'cbcc', "c2": 'cbc'}, {"c1": 'dbcc', "c2": 'dbc'},])

res = table_obj.output(["*"]).filter("char_position(c1, c2) <> 0").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('bbcc', 'cbcc', 'dbcc'),
'c2': ('bbc', 'cbc', 'dbc')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))

res = db_obj.drop_table("test_select_position"+suffix)
assert res.error_code == ErrorCode.OK
2 changes: 2 additions & 0 deletions src/function/builtin_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ import regex;
import ltrim;
import rtrim;
import trim;
import position;
import default_values;
import special_function;
import internal_types;
Expand Down Expand Up @@ -129,6 +130,7 @@ void BuiltinFunctions::RegisterScalarFunction() {
RegisterLtrimFunction(catalog_ptr_);
RegisterRtrimFunction(catalog_ptr_);
RegisterTrimFunction(catalog_ptr_);
RegisterPositionFunction(catalog_ptr_);
}

void BuiltinFunctions::RegisterTableFunction() {}
Expand Down
59 changes: 59 additions & 0 deletions src/function/scalar/position.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
module;

module position;

import stl;
import catalog;
import status;
import infinity_exception;
import scalar_function;
import scalar_function_set;
import column_vector;

import third_party;
import logical_type;
import internal_types;
import data_type;
import logger;

namespace infinity {

struct PositionFunction {
template <typename TA, typename TB, typename TC>
static inline void Run(TA &first, TB &second, TC &result, ColumnVector *first_ptr, ColumnVector *second_ptr, ColumnVector *) {
Status status = Status::NotSupport("Not implemented");
RecoverableError(status);
}
};

template <>
inline void PositionFunction::Run(VarcharT &first, VarcharT &second, IntegerT &result, ColumnVector *first_ptr, ColumnVector * second_ptr, ColumnVector *) {
Span<const char> first_v = first_ptr->GetVarcharInner(first);
Span<const char> second_v = second_ptr->GetVarcharInner(second);
String first_str(first_v.data(), first_v.size());
String second_str(second_v.data(), second_v.size());
String::size_type pos = first_str.find(second_str);
if(pos == String::npos){
result = 0;
}else{
result = pos + 1;
}
}



void RegisterPositionFunction(const UniquePtr<Catalog> &catalog_ptr) {
String func_name = "char_position";

SharedPtr<ScalarFunctionSet> function_set_ptr = MakeShared<ScalarFunctionSet>(func_name);

ScalarFunction varchar_pos_int32(func_name,
{DataType(LogicalType::kVarchar), DataType(LogicalType::kVarchar)},
{DataType(LogicalType::kInteger)},
&ScalarFunction::BinaryFunctionVarlenToVarlen<VarcharT, VarcharT, IntegerT, PositionFunction>);
function_set_ptr->AddFunction(varchar_pos_int32);

Catalog::AddFunctionSet(catalog_ptr.get(), function_set_ptr);
}

} // namespace infinity
13 changes: 13 additions & 0 deletions src/function/scalar/position.cppm
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module;

export module position;

import stl;

namespace infinity {

class Catalog;

export void RegisterPositionFunction(const UniquePtr<Catalog> &catalog_ptr);

}
51 changes: 47 additions & 4 deletions src/function/scalar_function.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ struct UnaryOpDirectWrapper {
template <typename Operator>
struct BinaryOpDirectWrapper {
template <typename LeftValueType, typename RightValueType, typename TargetValueType>
inline static void Execute(LeftValueType left, RightValueType right, TargetValueType &result, Bitmask *, SizeT, void *) {
inline static void Execute(LeftValueType left, RightValueType right, TargetValueType &result, Bitmask *, SizeT, void *, void *, void *) {
return Operator::template Run<LeftValueType, RightValueType, TargetValueType>(left, right, result);
}
};
Expand Down Expand Up @@ -81,7 +81,7 @@ struct UnaryTryOpWrapper {
template <typename Operator>
struct BinaryTryOpWrapper {
template <typename LeftValueType, typename RightValueType, typename TargetValueType>
inline static void Execute(LeftValueType left, RightValueType right, TargetValueType &result, Bitmask *nulls_ptr, SizeT idx, void *) {
inline static void Execute(LeftValueType left, RightValueType right, TargetValueType &result, Bitmask *nulls_ptr, SizeT idx, void *, void *, void *) {
if (Operator::template Run<LeftValueType, RightValueType, TargetValueType>(left, right, result)) {
return;
}
Expand Down Expand Up @@ -116,7 +116,7 @@ struct UnaryOpDirectToVarlenWrapper {
template <typename Operator>
struct BinaryOpDirectToVarlenWrapper {
template <typename LeftValueType, typename RightValueType, typename TargetValueType>
inline static void Execute(LeftValueType left, RightValueType right, TargetValueType &result, Bitmask *, SizeT, void *state_ptr) {
inline static void Execute(LeftValueType left, RightValueType right, TargetValueType &result, Bitmask *, SizeT, void *, void *, void *state_ptr) {
auto *function_data_ptr = (ScalarFunctionData *)(state_ptr);
return Operator::template Run<LeftValueType, RightValueType, TargetValueType>(left, right, result, function_data_ptr->column_vector_ptr_);
}
Expand Down Expand Up @@ -152,7 +152,7 @@ struct UnaryTryOpToVarlenWrapper {
template <typename Operator>
struct BinaryTryOpToVarlenWrapper {
template <typename LeftValueType, typename RightValueType, typename TargetValueType>
inline static void Execute(LeftValueType left, RightValueType right, TargetValueType &result, Bitmask *nulls_ptr, SizeT idx, void *state_ptr) {
inline static void Execute(LeftValueType left, RightValueType right, TargetValueType &result, Bitmask *nulls_ptr, SizeT idx, void *, void *, void *state_ptr) {
auto *function_data_ptr = (ScalarFunctionData *)(state_ptr);
if (Operator::template Run<LeftValueType, RightValueType, TargetValueType>(left, right, result, function_data_ptr->column_vector_ptr_)) {
return;
Expand Down Expand Up @@ -192,6 +192,17 @@ struct UnaryOpDirectVarlenToVarlenWrapper {
}
};

template <typename Operator>
struct BinaryOpDirectVarlenToVarlenWrapper {
template <typename LeftValueType, typename RightValueType, typename TargetValueType>
inline static void Execute(LeftValueType left, RightValueType right, TargetValueType &result, Bitmask *, SizeT, void *state_ptr_left, void *state_ptr_right, void *state_ptr) {
auto *function_data_ptr_left = (ScalarFunctionData *)(state_ptr_left);
auto *function_data_ptr_right = (ScalarFunctionData *)(state_ptr_right);
auto *function_data_ptr = (ScalarFunctionData *)(state_ptr);
return Operator::template Run<LeftValueType, RightValueType, TargetValueType>(left, right, result, function_data_ptr_left->column_vector_ptr_, function_data_ptr_right->column_vector_ptr_, function_data_ptr->column_vector_ptr_);
}
};

template <typename Operator>
struct TernaryTryOpVarlenToVarlenWrapper {
template <typename FirstType, typename SecondType, typename ThirdType, typename ResultType>
Expand Down Expand Up @@ -351,6 +362,8 @@ public:
output,
input.row_count(),
nullptr,
nullptr,
nullptr,
true);
}

Expand All @@ -370,6 +383,8 @@ public:
output,
input.row_count(),
nullptr,
nullptr,
nullptr,
true);
}

Expand All @@ -389,6 +404,8 @@ public:
input.column_vectors[1],
output,
input.row_count(),
nullptr,
nullptr,
&function_data,
true);
}
Expand All @@ -409,10 +426,36 @@ public:
input.column_vectors[1],
output,
input.row_count(),
nullptr,
nullptr,
&function_data,
true);
}

// Binary function result is varlen without any failure.
template <typename LeftType, typename RightType, typename OutputType, typename Operation>
static inline void BinaryFunctionVarlenToVarlen(const DataBlock &input, SharedPtr<ColumnVector> &output) {
if (input.column_count() != 2) {
String error_message = "Binary function: input column count isn't two.";
UnrecoverableError(error_message);
}
if (!input.Finalized()) {
String error_message = "Input data block is finalized";
UnrecoverableError(error_message);
}
ScalarFunctionData function_data_left(input.column_vectors[0].get());
ScalarFunctionData function_data_right(input.column_vectors[1].get());
ScalarFunctionData function_data(output.get());
BinaryOperator::Execute<LeftType, RightType, OutputType, BinaryOpDirectVarlenToVarlenWrapper<Operation>>(input.column_vectors[0],
input.column_vectors[1],
output,
input.row_count(),
&function_data_left,
&function_data_right,
&function_data,
true);
}

// Ternary function without any failure.
template <typename FirstType, typename SecondType, typename ThirdType, typename ResultType, typename Operation>
static inline void TernaryFunction(const DataBlock &input, SharedPtr<ColumnVector> &output) {
Expand Down
5 changes: 3 additions & 2 deletions src/storage/column_vector/column_vector.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -1234,10 +1234,11 @@ concept PODValueType = IsAnyOf<ValueType,
TimestampT,
IntervalT,
RowID,
UuidT>;
UuidT,
VarcharT>;

export template <typename ValueType>
concept BinaryGenerateBoolean = PODValueType<ValueType> or IsAnyOf<ValueType, BooleanT, VarcharT>;
concept BinaryGenerateBoolean = PODValueType<ValueType> or IsAnyOf<ValueType, BooleanT>;

template <typename Unsupported>
class ColumnVectorPtrAndIdx {
Expand Down
Loading

0 comments on commit 5e89468

Please sign in to comment.