Skip to content

Commit

Permalink
[fix](storage) low_cardinality_optimize core dump when is null predic…
Browse files Browse the repository at this point in the history
…ate (#9586)

Issue Number: close #9555
Make the last value of the dictionary null, when ColumnDict inserts a null value,
add the encoding corresponding to the last value of the dictionary·
  • Loading branch information
zenoyang authored and morningman committed May 18, 2022
1 parent 4315dc8 commit 0562bab
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 18 deletions.
7 changes: 5 additions & 2 deletions be/src/olap/comparison_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)

// todo(zeno) define interface in IColumn to simplify code
// If 1 OP 0 returns true, it means the predicate is > or >=
// If 1 OP 1 returns true, it means the predicate is >= or <=
// by this way, avoid redundant code
#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE) \
template <class T> \
void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \
Expand All @@ -164,7 +167,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \
auto& data_array = nested_col_ptr->get_data(); \
auto dict_code = \
IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 0 OP 1, 1 OP 1) \
IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 1 OP 0, 1 OP 1) \
: nested_col_ptr->find_code(_value); \
for (uint16_t i = 0; i < *size; i++) { \
uint16_t idx = sel[i]; \
Expand Down Expand Up @@ -192,7 +195,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
auto& dict_col = \
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(column);\
auto& data_array = dict_col.get_data(); \
auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1) \
auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1) \
: dict_col.find_code(_value); \
for (uint16_t i = 0; i < *size; ++i) { \
uint16_t idx = sel[i]; \
Expand Down
30 changes: 14 additions & 16 deletions be/src/vec/columns/column_dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,10 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {
}

void insert_data(const char* pos, size_t /*length*/) override {
_codes.push_back(unaligned_load<T>(pos));
LOG(FATAL) << "insert_data not supported in ColumnDictionary";
}

void insert_data(const T value) { _codes.push_back(value); }

void insert_default() override { _codes.push_back(T()); }
void insert_default() override { _codes.push_back(_dict.get_null_code()); }

void clear() override {
_codes.clear();
Expand Down Expand Up @@ -218,13 +216,12 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {
void insert_many_dict_data(const int32_t* data_array, size_t start_index,
const StringRef* dict_array, size_t data_num,
uint32_t dict_num) override {
if (!is_dict_inited()) {
if (_dict.empty()) {
_dict.reserve(dict_num);
for (uint32_t i = 0; i < dict_num; ++i) {
auto value = StringValue(dict_array[i].data, dict_array[i].size);
_dict.insert_value(value);
}
_dict_inited = true;
}

char* end_ptr = (char*)_codes.get_end_ptr();
Expand Down Expand Up @@ -266,8 +263,6 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {
return _dict.find_codes(values);
}

bool is_dict_inited() const { return _dict_inited; }

bool is_dict_sorted() const { return _dict_sorted; }

bool is_dict_code_converted() const { return _dict_code_converted; }
Expand Down Expand Up @@ -304,13 +299,17 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {
if (it != _inverted_index.end()) {
return it->second;
}
return -1;
return -2; // -1 is null code
}

T get_null_code() { return -1; }

inline StringValue& get_value(T code) {
return code >= _dict_data.size() ? _null_value : _dict_data[code];
}

inline StringValue& get_value(T code) { return _dict_data[code]; }

inline void generate_hash_values() {
if (_hash_values.size() == 0) {
if (_hash_values.empty()) {
_hash_values.resize(_dict_data.size());
for (size_t i = 0; i < _dict_data.size(); i++) {
auto& sv = _dict_data[i];
Expand Down Expand Up @@ -387,7 +386,10 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {

size_t byte_size() { return _dict_data.size() * sizeof(_dict_data[0]); }

bool empty() { return _dict_data.empty(); }

private:
StringValue _null_value = StringValue();
StringValue::Comparator _comparator;
// dict code -> dict value
DictContainer _dict_data;
Expand All @@ -405,16 +407,12 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {

private:
size_t _reserve_size;
bool _dict_inited = false;
bool _dict_sorted = false;
bool _dict_code_converted = false;
Dictionary _dict;
Container _codes;
};

template class ColumnDictionary<uint8_t>;
template class ColumnDictionary<uint16_t>;
template class ColumnDictionary<uint32_t>;
template class ColumnDictionary<int32_t>;

using ColumnDictI32 = vectorized::ColumnDictionary<doris::vectorized::Int32>;
Expand Down

0 comments on commit 0562bab

Please sign in to comment.