Skip to content

Commit

Permalink
[Pick 2.0](segment iterator) fix shrink non-char column coredump (#36466
Browse files Browse the repository at this point in the history
)

## Proposed changes
Pick from (#36275)
  • Loading branch information
airborne12 authored Jun 18, 2024
1 parent f4dbf83 commit c3d4998
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 9 deletions.
20 changes: 12 additions & 8 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,9 +310,6 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) {
}

RETURN_IF_ERROR(init_iterators());
if (_char_type_idx.empty() && _char_type_idx_no_0.empty()) {
_vec_init_char_column_id();
}

if (opts.output_columns != nullptr) {
_output_columns = *(opts.output_columns);
Expand Down Expand Up @@ -1676,15 +1673,19 @@ bool SegmentIterator::_has_char_type(const Field& column_desc) {
}
};

void SegmentIterator::_vec_init_char_column_id() {
void SegmentIterator::_vec_init_char_column_id(vectorized::Block* block) {
for (size_t i = 0; i < _schema->num_column_ids(); i++) {
auto cid = _schema->column_id(i);
const Field* column_desc = _schema->column(cid);

if (_has_char_type(*column_desc)) {
_char_type_idx.emplace_back(i);
if (i != 0) {
_char_type_idx_no_0.emplace_back(i);
// The additional deleted filter condition will be in the materialized column at the end of the block.
// After _output_column_by_sel_idx, it will be erased, so we do not need to shrink it.
if (i < block->columns()) {
if (_has_char_type(*column_desc)) {
_char_type_idx.emplace_back(i);
if (i != 0) {
_char_type_idx_no_0.emplace_back(i);
}
}
}
}
Expand Down Expand Up @@ -2037,6 +2038,9 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
_block_rowids.resize(_opts.block_row_max);
}
_current_return_columns.resize(_schema->columns().size());
if (_char_type_idx.empty() && _char_type_idx_no_0.empty()) {
_vec_init_char_column_id(block);
}
for (size_t i = 0; i < _schema->num_column_ids(); i++) {
auto cid = _schema->column_id(i);
auto column_desc = _schema->column(cid);
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/segment_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ class SegmentIterator : public RowwiseIterator {
// TODO: Fix Me
// CHAR type in storage layer padding the 0 in length. But query engine need ignore the padding 0.
// so segment iterator need to shrink char column before output it. only use in vec query engine.
void _vec_init_char_column_id();
void _vec_init_char_column_id(vectorized::Block* block);
bool _has_char_type(const Field& column_desc);

uint32_t segment_id() const { return _segment->id(); }
Expand Down
4 changes: 4 additions & 0 deletions regression-test/data/inverted_index_p0/test_delete.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
2

60 changes: 60 additions & 0 deletions regression-test/suites/inverted_index_p0/test_delete.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_delete"){
// prepare test table

def timeout = 60000
def delta_time = 1000
def alter_res = "null"
def useTime = 0

def indexTblName = "test_delete"

sql "DROP TABLE IF EXISTS ${indexTblName}"
// create 1 replica table
sql """
CREATE TABLE `${indexTblName}` (
`a` int NULL COMMENT '',
`b` varchar(60) NOT NULL COMMENT '',
`c` char(10) NULL COMMENT '',
INDEX index_b(b) USING INVERTED COMMENT '',
INDEX index_c(c) USING INVERTED COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`a`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`a`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"is_being_synced" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false"
);
"""

sql """ INSERT INTO `${indexTblName}`(`a`, `b`, `c`) VALUES ('1', '6afef581285b6608bf80d5a4e46cf839', 'aaa'), ('2', '48a33ec3453a28bce84b8f96fe161956', 'bbb'),
('3', '021603e7dcfe65d44af0efd0e5aee154', 'ccc'), ('4', 'ee27ee1da291e46403c408e220bed6e1', 'ddd'),
('5', 'a648a447b8f71522f11632eba4b4adde', 'eee'), ('6', 'a9fb5c985c90bf05f3bee5ca3ae95260', 'fff'),
('7', '0974e7a82e30d1af83205e474fadd0a2', 'ggg'); """


sql """ DELETE FROM ${indexTblName} WHERE c IN ('aaa','ccc'); """

qt_sql """ SELECT count(1) as cnt FROM ${indexTblName} WHERE a BETWEEN 1 AND 6 AND b IN ('48a33ec3453a28bce84b8f96fe161956', 'a9fb5c985c90bf05f3bee5ca3ae95260'); """
}

0 comments on commit c3d4998

Please sign in to comment.