diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp index 369db7a4cb8aea..ed6da5606593bb 100644 --- a/be/src/olap/delete_handler.cpp +++ b/be/src/olap/delete_handler.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include "gen_cpp/olap_file.pb.h" #include "olap/olap_common.h" @@ -50,7 +51,7 @@ OLAPStatus DeleteConditionHandler::generate_delete_predicate( const TabletSchema& schema, const std::vector& conditions, DeletePredicatePB* del_pred) { - if (conditions.size() == 0) { + if (conditions.empty()) { LOG(WARNING) << "invalid parameters for store_cond." << " condition_size=" << conditions.size(); return OLAP_ERR_DELETE_INVALID_PARAMETERS; @@ -66,9 +67,22 @@ OLAPStatus DeleteConditionHandler::generate_delete_predicate( // 存储删除条件 for (const TCondition& condition : conditions) { - string condition_str = construct_sub_predicates(condition); - del_pred->add_sub_predicates(condition_str); - LOG(INFO) << "store one sub-delete condition. condition=" << condition_str; + if (condition.condition_values.size() > 1) { + InPredicatePB* in_pred = del_pred->add_in_predicates(); + in_pred->set_column_name(condition.column_name); + bool is_not_in = condition.condition_op == "!*="; + in_pred->set_is_not_in(is_not_in); + for (const auto& condition_value : condition.condition_values) { + in_pred->add_values(condition_value); + } + + LOG(INFO) << "store one sub-delete condition. condition name=" << in_pred->column_name() + << "condition size=" << in_pred->values().size(); + } else { + string condition_str = construct_sub_predicates(condition); + del_pred->add_sub_predicates(condition_str); + LOG(INFO) << "store one sub-delete condition. condition=" << condition_str; + } } del_pred->set_version(-1); @@ -86,47 +100,20 @@ string DeleteConditionHandler::construct_sub_predicates(const TCondition& condit if ("IS" == op) { condition_str = condition.column_name + " " + op + " " + condition.condition_values[0]; } else { + if (op == "*=") { + op = "="; + } else if (op == "!*=") { + op = "!="; + } condition_str = condition.column_name + op + condition.condition_values[0]; } return condition_str; } -OLAPStatus DeleteConditionHandler::check_condition_valid( - const TabletSchema& schema, - const TCondition& cond) { - // 检查指定列名的列是否存在 - int field_index = _get_field_index(schema, cond.column_name); - - if (field_index < 0) { - OLAP_LOG_WARNING("field is not existent. [field_index=%d]", field_index); - return OLAP_ERR_DELETE_INVALID_CONDITION; - } - - // 检查指定的列是不是key,是不是float或doulbe类型 - const TabletColumn& column = schema.column(field_index); - - if ((!column.is_key() && schema.keys_type() != KeysType::DUP_KEYS) - || column.type() == OLAP_FIELD_TYPE_DOUBLE - || column.type() == OLAP_FIELD_TYPE_FLOAT) { - LOG(WARNING) << "field is not key column, or storage model is not duplicate, or data type is float or double."; - return OLAP_ERR_DELETE_INVALID_CONDITION; - } - - // 检查删除条件中指定的过滤值是否符合每个类型自身的要求 - // 1. 对于整数类型(int8,int16,in32,int64,uint8,uint16,uint32,uint64),检查是否溢出 - // 2. 对于decimal类型,检查是否超过建表时指定的精度和标度 - // 3. 对于date和datetime类型,检查指定的过滤值是否符合日期格式以及是否指定错误的值 - // 4. 对于string和varchar类型,检查指定的过滤值是否超过建表时指定的长度 - if (cond.condition_values.size() != 1) { - OLAP_LOG_WARNING("invalid condition value size. [size=%ld]", cond.condition_values.size()); - return OLAP_ERR_DELETE_INVALID_CONDITION; - } - const string& value_str = cond.condition_values[0]; - - FieldType field_type = column.type(); +bool DeleteConditionHandler::is_condition_value_valid(const TabletColumn& column, const TCondition& cond, const string& value_str) { bool valid_condition = false; - - if ("IS" == cond.condition_op + FieldType field_type = column.type(); + if ("IS" == cond.condition_op && ("NULL" == value_str || "NOT NULL" == value_str)) { valid_condition = true; } else if (field_type == OLAP_FIELD_TYPE_TINYINT) { @@ -160,54 +147,49 @@ OLAPStatus DeleteConditionHandler::check_condition_valid( } else { OLAP_LOG_WARNING("unknown field type. [type=%d]", field_type); } - - if (valid_condition) { - return OLAP_SUCCESS; - } else { - LOG(WARNING) << "invalid condition value. [value=" << value_str << "]"; - return OLAP_ERR_DELETE_INVALID_CONDITION; - } + return valid_condition; } -OLAPStatus DeleteConditionHandler::_check_version_valid(std::vector* all_file_versions, - const int32_t filter_version) { - // 找到当前最大的delta文件版本号 - int max_delta_version = -1; - vector::const_iterator version_iter = all_file_versions->begin(); +OLAPStatus DeleteConditionHandler::check_condition_valid( + const TabletSchema& schema, + const TCondition& cond) { + // 检查指定列名的列是否存在 + int field_index = _get_field_index(schema, cond.column_name); - for (; version_iter != all_file_versions->end(); ++version_iter) { - if (version_iter->second > max_delta_version) { - max_delta_version = version_iter->second; - } + if (field_index < 0) { + OLAP_LOG_WARNING("field is not existent. [field_index=%d]", field_index); + return OLAP_ERR_DELETE_INVALID_CONDITION; } - if (filter_version == max_delta_version || filter_version == max_delta_version + 1) { - return OLAP_SUCCESS; - } else { - OLAP_LOG_WARNING("invalid delete condition version. [version=%d, max_delta_version=%d]", - filter_version, max_delta_version); - return OLAP_ERR_DELETE_INVALID_VERSION; - } -} + // 检查指定的列是不是key,是不是float或doulbe类型 + const TabletColumn& column = schema.column(field_index); -int DeleteConditionHandler::_check_whether_condition_exist(const DelPredicateArray& delete_conditions, int cond_version) { - if (delete_conditions.size() == 0) { - return -1; + if ((!column.is_key() && schema.keys_type() != KeysType::DUP_KEYS) + || column.type() == OLAP_FIELD_TYPE_DOUBLE + || column.type() == OLAP_FIELD_TYPE_FLOAT) { + LOG(WARNING) << "field is not key column, or storage model is not duplicate, or data type is float or double."; + return OLAP_ERR_DELETE_INVALID_CONDITION; } - int index = 0; - - while (index != delete_conditions.size()) { - DeletePredicatePB temp = delete_conditions.Get(index); + // 检查删除条件中指定的过滤值是否符合每个类型自身的要求 + // 1. 对于整数类型(int8,int16,in32,int64,uint8,uint16,uint32,uint64),检查是否溢出 + // 2. 对于decimal类型,检查是否超过建表时指定的精度和标度 + // 3. 对于date和datetime类型,检查指定的过滤值是否符合日期格式以及是否指定错误的值 + // 4. 对于string和varchar类型,检查指定的过滤值是否超过建表时指定的长度 + if ("*=" != cond.condition_op && "!*=" != cond.condition_op && cond.condition_values.size() != 1) { + OLAP_LOG_WARNING("invalid condition value size. [size=%ld]", cond.condition_values.size()); + return OLAP_ERR_DELETE_INVALID_CONDITION; + } - if (temp.version() == cond_version) { - return index; + for (int i = 0; i < cond.condition_values.size(); i++) { + const string& value_str = cond.condition_values[i]; + if (!is_condition_value_valid(column, cond, value_str)) { + LOG(WARNING) << "invalid condition value. [value=" << value_str << "]"; + return OLAP_ERR_DELETE_INVALID_CONDITION; } - - ++index; } - return -1; + return OLAP_SUCCESS; } bool DeleteHandler::_parse_condition(const std::string& condition_str, TCondition* condition) { @@ -217,7 +199,7 @@ bool DeleteHandler::_parse_condition(const std::string& condition_str, TConditio try { // Condition string format const char* const CONDITION_STR_PATTERN = - "(\\w+)\\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\\*=)|(?:IS))\\s*((?:[\\S ]+)?)"; + R"((\w+)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS))\s*((?:[\S ]+)?))"; regex ex(CONDITION_STR_PATTERN); if (regex_match(condition_str, what, ex)) { if (condition_str.size() != what[0].str().size()) { @@ -234,7 +216,6 @@ bool DeleteHandler::_parse_condition(const std::string& condition_str, TConditio if (!matched) { return false; } - condition->column_name = what[1].str(); condition->condition_op = what[2].str(); condition->condition_values.push_back(what[3].str()); @@ -268,13 +249,12 @@ OLAPStatus DeleteHandler::init(const TabletSchema& schema, temp.del_cond = new(std::nothrow) Conditions(); - if (temp.del_cond == NULL) { + if (temp.del_cond == nullptr) { LOG(FATAL) << "fail to malloc Conditions. size=" << sizeof(Conditions); return OLAP_ERR_MALLOC_ERROR; } temp.del_cond->set_tablet_schema(&schema); - for (int i = 0; i != it->sub_predicates_size(); ++i) { TCondition condition; if (!_parse_condition(it->sub_predicates(i), &condition)) { @@ -290,6 +270,25 @@ OLAPStatus DeleteHandler::init(const TabletSchema& schema, } } + for (int i = 0; i != it->in_predicates_size(); ++i) { + TCondition condition; + const InPredicatePB& in_predicate = it->in_predicates(i); + condition.__set_column_name(in_predicate.column_name()); + if (in_predicate.is_not_in()) { + condition.__set_condition_op("!*="); + } else { + condition.__set_condition_op("*="); + } + for (const auto& value : in_predicate.values()) { + condition.condition_values.push_back(value); + } + OLAPStatus res = temp.del_cond->append_condition(condition); + if (OLAP_SUCCESS != res) { + OLAP_LOG_WARNING("fail to append condition.[res=%d]", res); + return res; + } + } + _del_conds.push_back(temp); } diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h index 3ba88ab5a09ec4..7cab577b494876 100644 --- a/be/src/olap/delete_handler.h +++ b/be/src/olap/delete_handler.h @@ -52,13 +52,6 @@ class DeleteConditionHandler { private: - // 检查指定的删除条件版本是否符合要求; - // 如果不符合要求,返回OLAP_ERR_DELETE_INVALID_VERSION;符合要求返回OLAP_SUCCESS - OLAPStatus _check_version_valid(std::vector* all_file_versions, const int32_t filter_version); - - // 检查指定版本的删除条件是否已经存在。如果存在,返回指定版本删除条件的数组下标;不存在返回-1 - int _check_whether_condition_exist(const DelPredicateArray& delete_conditions, int cond_version); - int32_t _get_field_index(const TabletSchema& schema, const std::string& field_name) const { for (int i = 0; i < schema.num_columns(); i++) { if (schema.column(i).name() == field_name) { @@ -68,6 +61,8 @@ class DeleteConditionHandler { LOG(WARNING) << "invalid field name. name='" << field_name; return -1; } + + bool is_condition_value_valid(const TabletColumn& column, const TCondition& cond, const string& value); }; // 表示一个删除条件 @@ -100,10 +95,6 @@ class DeleteHandler { DeleteHandler() : _is_inited(false) {} ~DeleteHandler() {} - bool get_init_status() const { - return _is_inited; - } - // 初始化handler,将从Header文件中取出小于等于指定版本号的删除条件填充到_del_conds中 // 调用前需要先对Header文件加读锁 // diff --git a/be/src/olap/olap_cond.cpp b/be/src/olap/olap_cond.cpp index 56722851ad0984..1238b741e1d57f 100644 --- a/be/src/olap/olap_cond.cpp +++ b/be/src/olap/olap_cond.cpp @@ -56,65 +56,37 @@ using doris::ColumnStatistics; namespace doris { +#define MAX_OP_STR_LENGTH 3 + static CondOp parse_op_type(const string& op) { - if (op.size() > 2) { + if (op.size() > MAX_OP_STR_LENGTH) { return OP_NULL; } - CondOp op_type = OP_NULL; - if (op.compare("=") == 0) { + if (op == "=") { return OP_EQ; - } - - if (0 == strcasecmp(op.c_str(), "is")) { + } else if (0 == strcasecmp(op.c_str(), "is")) { return OP_IS; - } - - // Maybe we can just use string compare. - // Like: - // if (op == "!=") { - // op_type = OP_NE; - // } else if (op == "*") { - // op_type = OP_IN; - // } else if (op == ">=) { - // ... - - switch (op.c_str()[0]) { - case '!': - op_type = OP_NE; - break; - case '*': - op_type = OP_IN; - break; - case '>': - switch (op.c_str()[1]) { - case '=': - op_type = OP_GE; - break; - default: - op_type = OP_GT; - break; - } - break; - case '<': - switch (op.c_str()[1]) { - case '=': - op_type = OP_LE; - break; - default: - op_type = OP_LT; - break; - } - break; - default: - op_type = OP_NULL; - break; - } - - return op_type; + } else if (op == "!=") { + return OP_NE; + } else if (op == "*=") { + return OP_IN; + } else if (op == "!*=") { + return OP_NOT_IN; + } else if (op == ">=") { + return OP_GE; + } else if (op == ">>" || op == ">") { + return OP_GT; + } else if (op == "<=") { + return OP_LE; + } else if (op == "<<" || op == "<") { + return OP_LT; + } + + return OP_NULL; } -Cond::Cond() : op(OP_NULL), operand_field(nullptr) { +Cond::Cond() : op(OP_NULL), operand_field(nullptr), min_value_field(nullptr), max_value_field(nullptr) { } Cond::~Cond() { @@ -122,12 +94,14 @@ Cond::~Cond() { for (auto& it : operand_set) { delete it; } + min_value_field = nullptr; + max_value_field = nullptr; } OLAPStatus Cond::init(const TCondition& tcond, const TabletColumn& column) { // Parse op type op = parse_op_type(tcond.condition_op); - if (op == OP_NULL || (op != OP_IN && tcond.condition_values.size() != 1)) { + if (op == OP_NULL || (op != OP_IN && op != OP_NOT_IN && tcond.condition_values.size() != 1)) { OLAP_LOG_WARNING("Condition op type is invalid. [name=%s, op=%d, size=%d]", tcond.column_name.c_str(), op, tcond.condition_values.size()); return OLAP_ERR_INPUT_PARAMETER_ERROR; @@ -147,7 +121,7 @@ OLAPStatus Cond::init(const TCondition& tcond, const TabletColumn& column) { f->set_not_null(); } operand_field = f.release(); - } else if (op != OP_IN) { + } else if (op != OP_IN && op != OP_NOT_IN) { auto operand = tcond.condition_values.begin(); std::unique_ptr f(WrapperField::create(column, operand->length())); if (f == nullptr) { @@ -176,6 +150,14 @@ OLAPStatus Cond::init(const TCondition& tcond, const TabletColumn& column) { tcond.column_name.c_str(), operand.c_str(), op); return res; } + if (min_value_field == nullptr || f->cmp(min_value_field) < 0) { + min_value_field = f.get(); + } + + if (max_value_field == nullptr || f->cmp(max_value_field) > 0) { + max_value_field = f.get(); + } + auto insert_reslut = operand_set.insert(f.get()); if (!insert_reslut.second) { LOG(WARNING) << "Duplicate operand in in-predicate.[condition=" << operand << "]"; @@ -210,19 +192,19 @@ bool Cond::eval(const RowCursorCell& cell) const { case OP_GE: return operand_field->field()->compare_cell(*operand_field, cell) <= 0; case OP_IN: { - for (const WrapperField* field : operand_set) { - if (field->field()->compare_cell(*field, cell) == 0) { - return true; - } - } - return false; + WrapperField wrapperField(const_cast (min_value_field->field()), cell); + auto ret = operand_set.find(&wrapperField) != operand_set.end(); + wrapperField.release_field(); + return ret; + } + case OP_NOT_IN: { + WrapperField wrapperField(const_cast (min_value_field->field()), cell); + auto ret = operand_set.find(&wrapperField) == operand_set.end(); + wrapperField.release_field(); + return ret; } case OP_IS: { - if (operand_field->is_null() == cell.is_null()) { - return true; - } else { - return false; - } + return operand_field->is_null() == cell.is_null(); } default: // Unknown operation type, just return false @@ -262,28 +244,16 @@ bool Cond::eval(const std::pair& statistic) const return operand_field->cmp(statistic.second) <= 0; } case OP_IN: { - FieldSet::const_iterator it = operand_set.begin(); - for (; it != operand_set.end(); ++it) { - if ((*it)->cmp(statistic.first) >= 0 - && (*it)->cmp(statistic.second) <= 0) { - return true; - } - } - break; + return min_value_field->cmp(statistic.second) <= 0 && max_value_field->cmp(statistic.first) >= 0; + } + case OP_NOT_IN: { + return min_value_field->cmp(statistic.second) > 0 || max_value_field->cmp(statistic.first) < 0; } case OP_IS: { if (operand_field->is_null()) { - if (statistic.first->is_null()) { - return true; - } else { - return false; - } + return statistic.first->is_null(); } else { - if (!statistic.second->is_null()) { - return true; - } else { - return false; - } + return !statistic.second->is_null(); } } default: @@ -378,20 +348,30 @@ int Cond::del_eval(const std::pair& stat) const { return ret; } case OP_IN: { - FieldSet::const_iterator it = operand_set.begin(); - for (; it != operand_set.end(); ++it) { - if ((*it)->cmp(stat.first) >= 0 - && (*it)->cmp(stat.second) <= 0) { - if (stat.first->cmp(stat.second) == 0) { - ret = DEL_SATISFIED; - } else { - ret = DEL_PARTIAL_SATISFIED; - } - break; + if (stat.first->cmp(stat.second) == 0) { + if (operand_set.find(stat.first) != operand_set.end()) { + ret = DEL_SATISFIED; + } else { + ret = DEL_NOT_SATISFIED; + } + } else { + if (min_value_field->cmp(stat.second) <= 0 && max_value_field->cmp(stat.first) >= 0) { + ret = DEL_PARTIAL_SATISFIED; } } - if (it == operand_set.end()) { - ret = DEL_SATISFIED; + return ret; + } + case OP_NOT_IN: { + if (stat.first->cmp(stat.second) == 0) { + if (operand_set.find(stat.first) == operand_set.end()) { + ret = DEL_SATISFIED; + } else { + ret = DEL_NOT_SATISFIED; + } + } else { + if (min_value_field->cmp(stat.second) > 0 || max_value_field->cmp(stat.first) < 0) { + ret = DEL_PARTIAL_SATISFIED; + } } return ret; } @@ -491,12 +471,7 @@ bool Cond::eval(const segment_v2::BloomFilter* bf) const { } case OP_IS: { // IS [NOT] NULL can only used in to filter IS NULL predicate. - if (operand_field->is_null()) { - return bf->test_bytes(nullptr, 0); - } else { - // is not null - return !bf->test_bytes(nullptr, 0); - } + return operand_field->is_null() == bf->test_bytes(nullptr, 0); } default: break; @@ -705,7 +680,7 @@ int Conditions::delete_pruning_filter(const std::vector& zone_maps) co // if the size of condcolumn vector is zero, // the delete condtion is not satisfied. ret = DEL_NOT_SATISFIED; - } else if (true == del_partial_satisfied) { + } else if (del_partial_satisfied) { ret = DEL_PARTIAL_SATISFIED; } else { ret = DEL_SATISFIED; diff --git a/be/src/olap/olap_cond.h b/be/src/olap/olap_cond.h index 7166101ea83708..c456be30c64ba1 100644 --- a/be/src/olap/olap_cond.h +++ b/be/src/olap/olap_cond.h @@ -38,15 +38,16 @@ class WrapperField; class RowCursorCell; enum CondOp { + OP_NULL = -1, // invalid op OP_EQ = 0, // equal OP_NE = 1, // not equal OP_LT = 2, // less than OP_LE = 3, // less or equal OP_GT = 4, // greater than OP_GE = 5, // greater or equal - OP_IN = 6, // IN + OP_IN = 6, // in OP_IS = 7, // is null or not null - OP_NULL = 8 // invalid OP + OP_NOT_IN = 8 // not in }; // Hash functor for IN set @@ -87,11 +88,14 @@ struct Cond { } CondOp op; - // valid when op is not OP_IN + // valid when op is not OP_IN and OP_NOT_IN WrapperField* operand_field; - // valid when op is OP_IN + // valid when op is OP_IN or OP_NOT_IN typedef std::unordered_set FieldSet; FieldSet operand_set; + // valid when op is OP_IN or OP_NOT_IN, represents the minimum or maximum value of in elements + WrapperField* min_value_field; + WrapperField* max_value_field; }; // 所有归属于同一列上的条件二元组,聚合在一个CondColumn上 diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 7f75bc8685931d..2db839b394d565 100755 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -599,6 +599,7 @@ void TabletMeta::add_delete_predicate(const DeletePredicatePB& delete_predicate, DeletePredicatePB* del_pred = _del_pred_array.Add(); del_pred->set_version(version); *del_pred->mutable_sub_predicates() = delete_predicate.sub_predicates(); + *del_pred->mutable_in_predicates() = delete_predicate.in_predicates(); } void TabletMeta::remove_delete_predicate_by_version(const Version& version) { diff --git a/be/src/olap/wrapper_field.cpp b/be/src/olap/wrapper_field.cpp index 34f3ca7e32bb3a..60e76cc8091e87 100644 --- a/be/src/olap/wrapper_field.cpp +++ b/be/src/olap/wrapper_field.cpp @@ -86,4 +86,8 @@ WrapperField::WrapperField(Field* rep, size_t variable_len, bool is_string_type) } } +WrapperField::WrapperField(Field* rep, const RowCursorCell &row_cursor_cell) + : _rep(rep), _field_buf((char *)row_cursor_cell.cell_ptr() - 1) { +} + } diff --git a/be/src/olap/wrapper_field.h b/be/src/olap/wrapper_field.h index 7f4aa5bc83428d..62c0233bde2419 100644 --- a/be/src/olap/wrapper_field.h +++ b/be/src/olap/wrapper_field.h @@ -36,6 +36,10 @@ class WrapperField { WrapperField(Field* rep, size_t variable_len, bool is_string_type); + // only used to wrapped content of row cursor cell to find element in wrapped field set + // do not delete rep, should call release_field before deconstructed + WrapperField(Field* rep, const RowCursorCell& row_cursor_cell); + virtual ~WrapperField() { delete _rep; delete [] _owned_buf; @@ -93,6 +97,11 @@ class WrapperField { void* mutable_cell_ptr() const { return _field_buf + 1; } const Field* field() const { return _rep; } + // should be only called by WrapperField which constructed by RowCursorCell + void release_field() { + _rep = nullptr; + } + int cmp(const WrapperField* field) const { return _rep->compare_cell(*this, *field); } diff --git a/be/test/olap/delete_handler_test.cpp b/be/test/olap/delete_handler_test.cpp index 9aa8722facb8a2..12db222ea110dd 100644 --- a/be/test/olap/delete_handler_test.cpp +++ b/be/test/olap/delete_handler_test.cpp @@ -307,21 +307,54 @@ TEST_F(TestDeleteConditionHandler, StoreCondSucceed) { condition.condition_values.push_back("3"); conditions.push_back(condition); - condition.column_name = "k2"; + condition.column_name = "k3"; condition.condition_op = "<="; condition.condition_values.clear(); condition.condition_values.push_back("5"); conditions.push_back(condition); + condition.column_name = "k4"; + condition.condition_op = "IS"; + condition.condition_values.clear(); + condition.condition_values.push_back("NULL"); + conditions.push_back(condition); + + condition.column_name = "k5"; + condition.condition_op = "*="; + condition.condition_values.clear(); + condition.condition_values.push_back("7"); + conditions.push_back(condition); + + condition.column_name = "k12"; + condition.condition_op = "!*="; + condition.condition_values.clear(); + condition.condition_values.push_back("9"); + conditions.push_back(condition); + + condition.column_name = "k13"; + condition.condition_op = "*="; + condition.condition_values.clear(); + condition.condition_values.push_back("1"); + condition.condition_values.push_back("3"); + conditions.push_back(condition); + DeletePredicatePB del_pred; success_res = _delete_condition_handler.generate_delete_predicate(tablet->tablet_schema(), conditions, &del_pred); ASSERT_EQ(OLAP_SUCCESS, success_res); // 验证存储在header中的过滤条件正确 - ASSERT_EQ(size_t(3), del_pred.sub_predicates_size()); + ASSERT_EQ(size_t(6), del_pred.sub_predicates_size()); EXPECT_STREQ("k1=1", del_pred.sub_predicates(0).c_str()); EXPECT_STREQ("k2>>3", del_pred.sub_predicates(1).c_str()); - EXPECT_STREQ("k2<=5", del_pred.sub_predicates(2).c_str()); + EXPECT_STREQ("k3<=5", del_pred.sub_predicates(2).c_str()); + EXPECT_STREQ("k4 IS NULL", del_pred.sub_predicates(3).c_str()); + EXPECT_STREQ("k5=7", del_pred.sub_predicates(4).c_str()); + EXPECT_STREQ("k12!=9", del_pred.sub_predicates(5).c_str()); + + ASSERT_EQ(size_t(1), del_pred.in_predicates_size()); + ASSERT_FALSE(del_pred.in_predicates(0).is_not_in()); + EXPECT_STREQ("k13", del_pred.in_predicates(0).column_name().c_str()); + ASSERT_EQ(std::size_t(2), del_pred.in_predicates(0).values().size()); } // 检测参数不正确的情况,包括:空的过滤条件字符串 diff --git a/docs/en/administrator-guide/config/fe_config.md b/docs/en/administrator-guide/config/fe_config.md index b62e3f1fadc959..14118c47445b49 100644 --- a/docs/en/administrator-guide/config/fe_config.md +++ b/docs/en/administrator-guide/config/fe_config.md @@ -379,6 +379,10 @@ This variable is a dynamic configuration, and users can modify the configuration ### `max_agent_task_threads_num` +### `max_allowed_in_element_num_of_delete` + +This configuration is used to limit element num of InPredicate in delete statement. The default value is 1024. + ### `max_allowed_packet` ### `max_backend_down_time_second` diff --git a/docs/en/administrator-guide/variables.md b/docs/en/administrator-guide/variables.md index a41396a578827d..5914df91c5035f 100644 --- a/docs/en/administrator-guide/variables.md +++ b/docs/en/administrator-guide/variables.md @@ -234,7 +234,7 @@ SET forward_to_master = concat('tr', 'u', 'e'); * `lower_case_table_names` Used for compatibility with MySQL clients. Cannot be set. Table names in current Doris are case sensitive by default. - + * `max_allowed_packet` Used for compatible JDBC connection pool C3P0. No practical effect. diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/DELETE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/DELETE.md index c586d20f1a0f40..bc51c2df7bf943 100644 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/DELETE.md +++ b/docs/en/sql-reference/sql-statements/Data Manipulation/DELETE.md @@ -30,12 +30,12 @@ under the License. This statement is used to conditionally delete data in the specified table (base index) partition. This action deletes the rollup index data associated with this base index at the same time. Grammar: - DELETE FROM table name [PARTITION partition name] + DELETE FROM table_name [PARTITION partition_name] WHERE - column_name1 op value[ AND column_name2 op value ...]; + column_name1 op { value | value_list } [ AND column_name2 op { value | value_list } ...]; Explain: - 1) Optional types of OP include: =,>,<,>=,<=,<=,<=,!= + 1) Optional types of OP include: =, >, <, >=, <=, <=, <=, !=, in, not in 2) Conditions on key columns can only be specified. 2) When the selected key column does not exist in a rollup, delete cannot be performed. 3) The relationship between conditions can only be "and". diff --git a/docs/zh-CN/administrator-guide/config/fe_config.md b/docs/zh-CN/administrator-guide/config/fe_config.md index e1ad951ce42a65..2a5d41647f88a6 100644 --- a/docs/zh-CN/administrator-guide/config/fe_config.md +++ b/docs/zh-CN/administrator-guide/config/fe_config.md @@ -377,6 +377,10 @@ FE 的配置项有两种方式进行配置: ### `max_agent_task_threads_num` +### `max_allowed_in_element_num_of_delete` + +该配置被用于限制delete语句中谓词in的元素数量。默认值为1024。 + ### `max_allowed_packet` ### `max_backend_down_time_second` diff --git a/docs/zh-CN/administrator-guide/variables.md b/docs/zh-CN/administrator-guide/variables.md index f9e6654410ae26..5e780f9b10bba7 100644 --- a/docs/zh-CN/administrator-guide/variables.md +++ b/docs/zh-CN/administrator-guide/variables.md @@ -233,7 +233,7 @@ SET forward_to_master = concat('tr', 'u', 'e'); * `lower_case_table_names` 用于兼容 MySQL 客户端。不可设置。当前 Doris 中的表名默认为大小写敏感。 - + * `max_allowed_packet` 用于兼容 JDBC 连接池 C3P0。 无实际作用。 diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/DELETE.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/DELETE.md index 8a3d800f6f365b..707813409e43d5 100644 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/DELETE.md +++ b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/DELETE.md @@ -31,11 +31,11 @@ under the License. 该操作会同时删除和此 base index 相关的 rollup index 的数据。 语法: DELETE FROM table_name [PARTITION partition_name] - WHERE - column_name1 op value[ AND column_name2 op value ...]; + WHERE + column_name1 op { value | value_list } [ AND column_name2 op { value | value_list } ...]; 说明: - 1) op 的可选类型包括:=, >, <, >=, <=, != + 1) op 的可选类型包括:=, >, <, >=, <=, !=, in, not in 2) 只能指定 key 列上的条件。 2) 当选定的 key 列不存在于某个 rollup 中时,无法进行 delete。 3) 条件之间只能是“与”的关系。 diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DeleteStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DeleteStmt.java index 2ff5a34730ec53..6d80c076605b9a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DeleteStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DeleteStmt.java @@ -20,6 +20,7 @@ import org.apache.doris.analysis.CompoundPredicate.Operator; import org.apache.doris.catalog.Catalog; import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Config; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.UserException; @@ -100,11 +101,11 @@ private void analyzePredicate(Expr predicate) throws AnalysisException { BinaryPredicate binaryPredicate = (BinaryPredicate) predicate; Expr leftExpr = binaryPredicate.getChild(0); if (!(leftExpr instanceof SlotRef)) { - throw new AnalysisException("Left expr should be column name"); + throw new AnalysisException("Left expr of binary predicate should be column name"); } Expr rightExpr = binaryPredicate.getChild(1); if (!(rightExpr instanceof LiteralExpr)) { - throw new AnalysisException("Right expr should be value"); + throw new AnalysisException("Right expr of binary predicate should be value"); } deleteConditions.add(binaryPredicate); } else if (predicate instanceof CompoundPredicate) { @@ -119,11 +120,29 @@ private void analyzePredicate(Expr predicate) throws AnalysisException { IsNullPredicate isNullPredicate = (IsNullPredicate) predicate; Expr leftExpr = isNullPredicate.getChild(0); if (!(leftExpr instanceof SlotRef)) { - throw new AnalysisException("Left expr should be column name"); + throw new AnalysisException("Left expr of is_null predicate should be column name"); } deleteConditions.add(isNullPredicate); + } else if (predicate instanceof InPredicate) { + InPredicate inPredicate = (InPredicate) predicate; + Expr leftExpr = inPredicate.getChild(0); + if (!(leftExpr instanceof SlotRef)) { + throw new AnalysisException("Left expr of in predicate should be column name"); + } + int inElementNum = inPredicate.getInElementNum(); + int maxAllowedInElementNumOfDelete = Config.max_allowed_in_element_num_of_delete; + if (inElementNum > maxAllowedInElementNumOfDelete) { + throw new AnalysisException("Element num of in predicate should not be more than " + maxAllowedInElementNumOfDelete); + } + for (int i = 1; i <= inPredicate.getInElementNum(); i++) { + Expr expr = inPredicate.getChild(i); + if (!(expr instanceof LiteralExpr)) { + throw new AnalysisException("Child of in predicate should be value"); + } + } + deleteConditions.add(inPredicate); } else { - throw new AnalysisException("Where clause should be compound or binary predicate"); + throw new AnalysisException("Where clause only supports compound predicate, binary predicate, is_null predicate or in predicate"); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java index bb16fee6d86df6..6261ea149f05da 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java @@ -1161,8 +1161,7 @@ public class Config extends ConfigBase { public static long min_clone_task_timeout_sec = 3 * 60; // 3min @ConfField(mutable = true, masterOnly = true) public static long max_clone_task_timeout_sec = 2 * 60 * 60; // 2h - - + /** * If set to true, fe will enable sql result cache * This option is suitable for offline data update scenarios @@ -1192,4 +1191,11 @@ public class Config extends ConfigBase { */ @ConfField(mutable = true, masterOnly = false) public static int cache_result_max_row_count = 3000; + + /** + * Used to limit element num of InPredicate in delete statement. + */ + @ConfField(mutable = true, masterOnly = true) + public static int max_allowed_in_element_num_of_delete = 1024; + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java b/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java index c12813a5ec239a..91a8006e0b3458 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java @@ -19,6 +19,7 @@ import org.apache.doris.analysis.BinaryPredicate; import org.apache.doris.analysis.DeleteStmt; +import org.apache.doris.analysis.InPredicate; import org.apache.doris.analysis.IsNullPredicate; import org.apache.doris.analysis.LiteralExpr; import org.apache.doris.analysis.Predicate; @@ -164,7 +165,7 @@ public void process(DeleteStmt stmt) throws DdlException, QueryStateException { List deleteConditions = Lists.newArrayList(); // pre check - checkDeleteV2(olapTable, partition, conditions, deleteConditions, true); + checkDeleteV2(olapTable, partition, conditions, deleteConditions); // generate label String label = "delete_" + UUID.randomUUID(); @@ -450,7 +451,22 @@ public DeleteJob getDeleteJob(long transactionId) { return idToDeleteJob.get(transactionId); } - private void checkDeleteV2(OlapTable table, Partition partition, List conditions, List deleteConditions, boolean preCheck) + private SlotRef getSlotRef(Predicate condition) { + SlotRef slotRef = null; + if (condition instanceof BinaryPredicate) { + BinaryPredicate binaryPredicate = (BinaryPredicate) condition; + slotRef = (SlotRef) binaryPredicate.getChild(0); + } else if (condition instanceof IsNullPredicate) { + IsNullPredicate isNullPredicate = (IsNullPredicate) condition; + slotRef = (SlotRef) isNullPredicate.getChild(0); + } else if (condition instanceof InPredicate) { + InPredicate inPredicate = (InPredicate) condition; + slotRef = (SlotRef) inPredicate.getChild(0); + } + return slotRef; + } + + private void checkDeleteV2(OlapTable table, Partition partition, List conditions, List deleteConditions) throws DdlException { // check partition state @@ -466,14 +482,7 @@ private void checkDeleteV2(OlapTable table, Partition partition, List nameToColumn.put(column.getName(), column); } for (Predicate condition : conditions) { - SlotRef slotRef = null; - if (condition instanceof BinaryPredicate) { - BinaryPredicate binaryPredicate = (BinaryPredicate) condition; - slotRef = (SlotRef) binaryPredicate.getChild(0); - } else if (condition instanceof IsNullPredicate) { - IsNullPredicate isNullPredicate = (IsNullPredicate) condition; - slotRef = (SlotRef) isNullPredicate.getChild(0); - } + SlotRef slotRef = getSlotRef(condition); String columnName = slotRef.getColumnName(); if (!nameToColumn.containsKey(columnName)) { ErrorReport.reportDdlException(ErrorCode.ERR_BAD_FIELD_ERROR, columnName, table.getName()); @@ -507,7 +516,18 @@ private void checkDeleteV2(OlapTable table, Partition partition, List LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType())); } catch (AnalysisException e) { // ErrorReport.reportDdlException(ErrorCode.ERR_INVALID_VALUE, value); - throw new DdlException("Invalid column value[" + value + "]"); + throw new DdlException("Invalid column value[" + value + "] for column " + columnName); + } + } else if (condition instanceof InPredicate) { + String value = null; + try { + InPredicate inPredicate = (InPredicate) condition; + for (int i = 1; i <= inPredicate.getInElementNum(); i++) { + value = ((LiteralExpr) inPredicate.getChild(i)).getStringValue(); + LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType())); + } + } catch (AnalysisException e) { + throw new DdlException("Invalid column value[" + value + "] for column " + columnName); } } @@ -516,6 +536,10 @@ private void checkDeleteV2(OlapTable table, Partition partition, List } Map> indexIdToSchema = table.getIndexIdToSchema(); for (MaterializedIndex index : partition.getMaterializedIndices(MaterializedIndex.IndexExtState.VISIBLE)) { + if (table.getBaseIndexId() == index.getId()) { + continue; + } + // check table has condition column Map indexColNameToColumn = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); for (Column column : indexIdToSchema.get(index.getId())) { @@ -523,18 +547,11 @@ private void checkDeleteV2(OlapTable table, Partition partition, List } String indexName = table.getIndexNameById(index.getId()); for (Predicate condition : conditions) { - String columnName = null; - if (condition instanceof BinaryPredicate) { - BinaryPredicate binaryPredicate = (BinaryPredicate) condition; - columnName = ((SlotRef) binaryPredicate.getChild(0)).getColumnName(); - } else if (condition instanceof IsNullPredicate) { - IsNullPredicate isNullPredicate = (IsNullPredicate) condition; - columnName = ((SlotRef) isNullPredicate.getChild(0)).getColumnName(); - } + SlotRef slotRef = getSlotRef(condition); + String columnName = slotRef.getColumnName(); Column column = indexColNameToColumn.get(columnName); if (column == null) { - ErrorReport.reportDdlException(ErrorCode.ERR_BAD_FIELD_ERROR, columnName, - table.getBaseIndexId() == index.getId()? indexName : "index[" + indexName +"]"); + ErrorReport.reportDdlException(ErrorCode.ERR_BAD_FIELD_ERROR, columnName, "index[" + indexName +"]"); } MaterializedIndexMeta indexMeta = table.getIndexIdToMeta().get(index.getId()); if (indexMeta.getKeysType() != KeysType.DUP_KEYS && !column.isKey()) { @@ -569,6 +586,19 @@ private void checkDeleteV2(OlapTable table, Partition partition, List sb.append(" IS NULL"); } deleteConditions.add(sb.toString()); + } else if (condition instanceof InPredicate) { + InPredicate inPredicate = (InPredicate) condition; + SlotRef slotRef = (SlotRef) inPredicate.getChild(0); + String columnName = slotRef.getColumnName(); + StringBuilder strBuilder = new StringBuilder(); + String notStr = inPredicate.isNotIn() ? "NOT " : ""; + strBuilder.append(columnName).append(" ").append(notStr).append("IN ("); + for (int i = 1; i <= inPredicate.getInElementNum(); ++i) { + strBuilder.append(inPredicate.getChild(i).toSql()); + strBuilder.append((i != inPredicate.getInElementNum()) ? ", " : ""); + } + strBuilder.append(")"); + deleteConditions.add(strBuilder.toString()); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/PushTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/PushTask.java index 1d35130af2cf7a..7160a8c951271f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/PushTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/PushTask.java @@ -19,6 +19,7 @@ import org.apache.doris.analysis.BinaryPredicate; import org.apache.doris.analysis.BinaryPredicate.Operator; +import org.apache.doris.analysis.InPredicate; import org.apache.doris.analysis.IsNullPredicate; import org.apache.doris.analysis.LiteralExpr; import org.apache.doris.analysis.Predicate; @@ -158,6 +159,15 @@ public TPushReq toThrift() { tCondition.setColumn_name(columnName); tCondition.setCondition_op(op); conditionValues.add(value); + } else if (condition instanceof InPredicate) { + InPredicate inPredicate = (InPredicate) condition; + String columnName = ((SlotRef) inPredicate.getChild(0)).getColumnName(); + String op = inPredicate.isNotIn() ? "!*=" : "*="; + tCondition.setColumn_name(columnName); + tCondition.setCondition_op(op); + for (int i = 1; i <= inPredicate.getInElementNum(); i++) { + conditionValues.add(((LiteralExpr)inPredicate.getChild(i)).getStringValue()); + } } tCondition.setCondition_values(conditionValues); diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/DeleteStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/DeleteStmtTest.java index 30a2f7e352eb64..e89bfcf123b480 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/DeleteStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/DeleteStmtTest.java @@ -32,6 +32,8 @@ import mockit.Mocked; +import java.util.List; + public class DeleteStmtTest { Analyzer analyzer; @@ -73,7 +75,7 @@ public void testAnalyze() { try { deleteStmt.analyze(analyzer); } catch (UserException e) { - Assert.assertTrue(e.getMessage().contains("should be compound or binary predicate")); + Assert.assertTrue(e.getMessage().contains("Where clause only supports compound predicate, binary predicate, is_null predicate or in predicate")); } // case 2 @@ -102,7 +104,7 @@ public void testAnalyze() { try { deleteStmt.analyze(analyzer); } catch (UserException e) { - Assert.assertTrue(e.getMessage().contains("should be compound or binary predicate")); + Assert.assertTrue(e.getMessage().contains("Where clause only supports compound predicate, binary predicate, is_null predicate or in predicate")); } // case 4 @@ -117,7 +119,7 @@ public void testAnalyze() { try { deleteStmt.analyze(analyzer); } catch (UserException e) { - Assert.assertTrue(e.getMessage().contains("Right expr should be value")); + Assert.assertTrue(e.getMessage().contains("Right expr of binary predicate should be value")); } // case 5 @@ -132,7 +134,7 @@ public void testAnalyze() { try { deleteStmt.analyze(analyzer); } catch (UserException e) { - Assert.assertTrue(e.getMessage().contains("Left expr should be column name")); + Assert.assertTrue(e.getMessage().contains("Left expr of binary predicate should be column name")); } // case 6 partition is null @@ -152,10 +154,16 @@ public void testAnalyze() { // normal binaryPredicate = new BinaryPredicate(Operator.EQ, new SlotRef(null, "k1"), new StringLiteral("abc")); - CompoundPredicate compoundPredicate2 = + List inList = Lists.newArrayList(); + inList.add(new StringLiteral("2323")); + inList.add(new StringLiteral("1518")); + inList.add(new StringLiteral("5768")); + inList.add(new StringLiteral("6725")); + InPredicate inPredicate = new InPredicate(new SlotRef(null, "k2"), inList, true); + CompoundPredicate compoundPredicate2 = new CompoundPredicate(org.apache.doris.analysis.CompoundPredicate.Operator.AND, binaryPredicate, - binaryPredicate); + inPredicate); compoundPredicate = new CompoundPredicate(org.apache.doris.analysis.CompoundPredicate.Operator.AND, binaryPredicate, compoundPredicate2); diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 7b04ec648da88f..9b49f421aa0e72 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -181,6 +181,13 @@ enum KeysType { message DeletePredicatePB { required int32 version = 1; repeated string sub_predicates = 2; + repeated InPredicatePB in_predicates = 3; +} + +message InPredicatePB { + optional string column_name = 1; + optional bool is_not_in = 2; + repeated string values = 3; } message OLAPHeaderMessage {