Skip to content

Commit

Permalink
[Fix](segment iterator) fix shrink non-char column coredump (apache#3…
Browse files Browse the repository at this point in the history
…6275)

If we execute a delete predicate in an inverted index table and then
query something from it, it will cause a core dump in
shrink_char_type_column_suffix_zero. This occurs because the delete
predicate column ID in _char_type_idx is incorrectly shifted to the
result column inserted by the inverted index result.

coredump stack like:
```
COULD NOT CREATE A LOGGINGFILE 20240604-145331!F20240604 14:53:31.991016 24178 column.h:134] Cannot get_shrinked_column() column Const(UInt8)
*** Check failure stack trace: ***
F20240604 14:53:31.991436 24158 column.h:134] Cannot get_shrinked_column() column Const(UInt8)
*** Check failure stack trace: ***
F20240604 14:53:31.991436 24158 column.h:134] Cannot get_shrinked_column() column Const(UInt8)F20240604 14:53:31.991645 24160 column.h:134] Cannot get_shrinked_column() column Const(UInt8)

    at /home/zcp/repo_center/doris_release/doris/be/src/vec/core/block.cpp:1126
    at /home/zcp/repo_center/doris_release/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2408
    at /home/zcp/repo_center/doris_release/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2090
    at /home/zcp/repo_center/doris_release/doris/be/src/olap/rowset/beta_rowset_reader.cpp:342
    at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/vcollect_iterator.h:256
    at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/vcollect_iterator.cpp:514
    at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/vcollect_iterator.cpp:493
    at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/vcollect_iterator.cpp:692
    at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/vcollect_iterator.cpp:186
    at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/block_reader.cpp:156
    at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/block_reader.cpp:228
--Type <RET> for more, q to quit, c to continue without paging--c
    at /home/zcp/repo_center/doris_release/doris/be/src/vec/exec/scan/new_olap_scanner.cpp:227
```
  • Loading branch information
airborne12 committed Jun 18, 2024
1 parent f4dbf83 commit 63d9add
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 9 deletions.
23 changes: 15 additions & 8 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,9 +310,6 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) {
}

RETURN_IF_ERROR(init_iterators());
if (_char_type_idx.empty() && _char_type_idx_no_0.empty()) {
_vec_init_char_column_id();
}

if (opts.output_columns != nullptr) {
_output_columns = *(opts.output_columns);
Expand Down Expand Up @@ -1676,15 +1673,22 @@ bool SegmentIterator::_has_char_type(const Field& column_desc) {
}
};

void SegmentIterator::_vec_init_char_column_id() {
void SegmentIterator::_vec_init_char_column_id(vectorized::Block* block) {
for (size_t i = 0; i < _schema->num_column_ids(); i++) {
auto cid = _schema->column_id(i);
const Field* column_desc = _schema->column(cid);

if (_has_char_type(*column_desc)) {
_char_type_idx.emplace_back(i);
if (i != 0) {
_char_type_idx_no_0.emplace_back(i);
// The additional deleted filter condition will be in the materialized column at the end of the block.
// After _output_column_by_sel_idx, it will be erased, so we do not need to shrink it.
if (i < block->columns()) {
if (_has_char_type(*column_desc)) {
_char_type_idx.emplace_back(i);
if (i != 0) {
_char_type_idx_no_0.emplace_back(i);
}
}
if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_CHAR) {
_is_char_type[cid] = true;
}
}
}
Expand Down Expand Up @@ -2037,6 +2041,9 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
_block_rowids.resize(_opts.block_row_max);
}
_current_return_columns.resize(_schema->columns().size());
if (_char_type_idx.empty() && _char_type_idx_no_0.empty()) {
_vec_init_char_column_id(block);
}
for (size_t i = 0; i < _schema->num_column_ids(); i++) {
auto cid = _schema->column_id(i);
auto column_desc = _schema->column(cid);
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/segment_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ class SegmentIterator : public RowwiseIterator {
// TODO: Fix Me
// CHAR type in storage layer padding the 0 in length. But query engine need ignore the padding 0.
// so segment iterator need to shrink char column before output it. only use in vec query engine.
void _vec_init_char_column_id();
void _vec_init_char_column_id(vectorized::Block* block);
bool _has_char_type(const Field& column_desc);

uint32_t segment_id() const { return _segment->id(); }
Expand Down
4 changes: 4 additions & 0 deletions regression-test/data/inverted_index_p0/test_delete.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
2

60 changes: 60 additions & 0 deletions regression-test/suites/inverted_index_p0/test_delete.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_delete"){
// prepare test table

def timeout = 60000
def delta_time = 1000
def alter_res = "null"
def useTime = 0

def indexTblName = "test_delete"

sql "DROP TABLE IF EXISTS ${indexTblName}"
// create 1 replica table
sql """
CREATE TABLE `${indexTblName}` (
`a` int NULL COMMENT '',
`b` varchar(60) NOT NULL COMMENT '',
`c` char(10) NULL COMMENT '',
INDEX index_b(b) USING INVERTED COMMENT '',
INDEX index_c(c) USING INVERTED COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`a`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`a`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"is_being_synced" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false"
);
"""

sql """ INSERT INTO `${indexTblName}`(`a`, `b`, `c`) VALUES ('1', '6afef581285b6608bf80d5a4e46cf839', 'aaa'), ('2', '48a33ec3453a28bce84b8f96fe161956', 'bbb'),
('3', '021603e7dcfe65d44af0efd0e5aee154', 'ccc'), ('4', 'ee27ee1da291e46403c408e220bed6e1', 'ddd'),
('5', 'a648a447b8f71522f11632eba4b4adde', 'eee'), ('6', 'a9fb5c985c90bf05f3bee5ca3ae95260', 'fff'),
('7', '0974e7a82e30d1af83205e474fadd0a2', 'ggg'); """


sql """ DELETE FROM ${indexTblName} WHERE c IN ('aaa','ccc'); """

qt_sql """ SELECT count(1) as cnt FROM ${indexTblName} WHERE a BETWEEN 1 AND 6 AND b IN ('48a33ec3453a28bce84b8f96fe161956', 'a9fb5c985c90bf05f3bee5ca3ae95260'); """
}

0 comments on commit 63d9add

Please sign in to comment.