From 9fefa2cb833bc2a85852c17eba92778711b2ccbc Mon Sep 17 00:00:00 2001 From: lihaopeng Date: Fri, 13 May 2022 17:03:10 +0800 Subject: [PATCH] [Bug][Vectorized] Fix BE crash with delete condition and enable_storage_vectorization --- .../rowset/segment_v2/segment_iterator.cpp | 41 +++++++++++++------ regression-test/data/delete/test_delete.out | 16 ++++++++ .../suites/delete/test_delete.groovy | 17 +++++++- 3 files changed, 59 insertions(+), 15 deletions(-) create mode 100644 regression-test/data/delete/test_delete.out diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index ec2093e2f87445..c8dbc448919847 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -797,7 +797,9 @@ void SegmentIterator::_init_current_block( auto cid = _schema.column_id(i); auto column_desc = _schema.column(cid); - if (_is_pred_column[cid]) { //todo(wb) maybe we can release it after output block + // the column in in block must clear() here to insert new data + if (_is_pred_column[cid] || + i >= block->columns()) { //todo(wb) maybe we can release it after output block current_columns[cid]->clear(); } else { // non-predicate column current_columns[cid] = std::move(*block->get_by_position(i).column).mutate(); @@ -815,8 +817,12 @@ void SegmentIterator::_init_current_block( void SegmentIterator::_output_non_pred_columns(vectorized::Block* block) { SCOPED_RAW_TIMER(&_opts.stats->output_col_ns); for (auto cid : _non_predicate_columns) { - block->replace_by_position(_schema_block_id_map[cid], - std::move(_current_return_columns[cid])); + auto loc = _schema_block_id_map[cid]; + // if loc < block->block->columns() means the the column is delete column and should + // not output by block, so just skip the column. + if (loc < block->columns()) { + block->replace_by_position(loc, std::move(_current_return_columns[cid])); + } } } @@ -950,15 +956,24 @@ Status SegmentIterator::next_batch(vectorized::Block* block) { _block_rowids.resize(_opts.block_row_max); } _current_return_columns.resize(_schema.columns().size()); - if (_is_need_vec_eval || _is_need_short_eval) { - for (size_t i = 0; i < _schema.num_column_ids(); i++) { - auto cid = _schema.column_id(i); - if (_is_pred_column[cid]) { - auto column_desc = _schema.column(cid); - _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr( - column_desc->type(), column_desc->is_nullable()); - _current_return_columns[cid]->reserve(_opts.block_row_max); - } + for (size_t i = 0; i < _schema.num_column_ids(); i++) { + auto cid = _schema.column_id(i); + auto column_desc = _schema.column(cid); + if (_is_pred_column[cid]) { + _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr( + column_desc->type(), column_desc->is_nullable()); + _current_return_columns[cid]->reserve(_opts.block_row_max); + } else if (i >= block->columns()) { + // if i >= block->columns means the column and not the pred_column means `column i` is + // a delete condition column. but the column is not effective in the segment. so we just + // create a column to hold the data. + // a. origin data -> b. delete condition -> c. new load data + // the segment of c do not effective delete condition, but it still need read the column + // to match the schema. + // TODO: skip read the not effective delete column to speed up segment read. + _current_return_columns[cid] = + Schema::get_data_type_ptr(*column_desc)->create_column(); + _current_return_columns[cid]->reserve(_opts.block_row_max); } } } @@ -973,7 +988,7 @@ Status SegmentIterator::next_batch(vectorized::Block* block) { _opts.stats->raw_rows_read += nrows_read; if (nrows_read == 0) { - for (int i = 0; i < _schema.num_column_ids(); i++) { + for (int i = 0; i < block->columns(); i++) { auto cid = _schema.column_id(i); // todo(wb) abstract make column where if (!_is_pred_column[cid]) { // non-predicate diff --git a/regression-test/data/delete/test_delete.out b/regression-test/data/delete/test_delete.out new file mode 100644 index 00000000000000..5dbc5223f67019 --- /dev/null +++ b/regression-test/data/delete/test_delete.out @@ -0,0 +1,16 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +8 + +-- !sql -- +8 + +-- !sql -- +8 + +-- !sql -- +8 + +-- !sql -- +8 + diff --git a/regression-test/suites/delete/test_delete.groovy b/regression-test/suites/delete/test_delete.groovy index 7fd9bc6f4bb025..97dfab32bb6c84 100644 --- a/regression-test/suites/delete/test_delete.groovy +++ b/regression-test/suites/delete/test_delete.groovy @@ -16,6 +16,19 @@ // under the License. suite("test_delete", "delete") { - // todo: test delete - sql "show delete" + def tableName = "delete_regression_test" + + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} (c1 varchar(190) NOT NULL COMMENT "",c2 bigint(20) NOT NULL COMMENT "", c3 varchar(160) NULL COMMENT "" ) ENGINE=OLAP DUPLICATE KEY(c1, c2) COMMENT "OLAP" DISTRIBUTED BY HASH(c3) BUCKETS 3 + PROPERTIES ( "replication_num" = "1" );""" + + sql """INSERT INTO ${tableName} VALUES ('abcdef',1,'fjdsajfldjafljdslajfdl'),('abcdef',2,'fjdsajfldjafljdslajfdl'),('abcdef',4,'fjdsajfldjafljdslajfdl'),('abcdef',5,'fjdsajfldjafljdslajfdl')""" + sql """delete from ${tableName} where c1 = 'fjdsajfldjafljdslajfdl';""" + sql """INSERT INTO ${tableName} VALUES ('abcdef',1,'fjdsajfldjafljdslajfdl'),('abcdef',2,'fjdsajfldjafljdslajfdl'),('abcdef',4,'fjdsajfldjafljdslajfdl'),('abcdef',5,'fjdsajfldjafljdslajfdl');""" + + qt_sql """select count(*) from ${tableName};""" + qt_sql """select count(c2) from ${tableName};""" + qt_sql """select count(c2) from ${tableName} where c1 = 'abcdef';""" + qt_sql """select count(c1) from ${tableName};""" + qt_sql """select count(c1) from ${tableName} where c1 = 'abcdef';""" }