Skip to content

Commit

Permalink
[fix](merge-on-write) incorrect result caused by key range filter wit…
Browse files Browse the repository at this point in the history
…h pk (apache#31456)
  • Loading branch information
liaoxin01 committed Mar 4, 2024
1 parent 02b6e30 commit bf56efc
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 12 deletions.
6 changes: 2 additions & 4 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1325,10 +1325,8 @@ Status SegmentIterator::_lookup_ordinal_from_pk_index(const RowCursor& key, bool
DCHECK(pk_index_reader != nullptr);

std::string index_key;
// when is_include is false, we shoudle append KEY_NORMAL_MARKER to the
// encode key. Otherwise, we will get an incorrect upper bound.
encode_key_with_padding<RowCursor, true, true>(
&index_key, key, _segment->_tablet_schema->num_key_columns(), is_include, true);
encode_key_with_padding<RowCursor, true>(
&index_key, key, _segment->_tablet_schema->num_key_columns(), is_include);
if (index_key < _segment->min_key()) {
*rowid = 0;
return Status::OK();
Expand Down
17 changes: 9 additions & 8 deletions be/src/util/key_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,28 +52,29 @@ constexpr uint8_t KEY_NORMAL_MARKER = 0x02;
constexpr uint8_t KEY_NULL_LAST_MARKER = 0xFE;
// Used to represent maximal value for that field
constexpr uint8_t KEY_MAXIMAL_MARKER = 0xFF;
// Used to represent a value greater than the normal marker by 1, using by MoW
constexpr uint8_t KEY_NORMAL_NEXT_MARKER = 0x03;

// Encode one row into binary according given num_keys.
// A cell will be encoded in the format of a marker and encoded content.
// When function encoding row, if any cell isn't found in row, this function will
// fill a marker and return. If padding_minimal is true, KEY_MINIMAL_MARKER will
// be added, if padding_minimal is false, KEY_MAXIMAL_MARKER will be added.
// If all num_keys are found in row, no marker will be added.
// if padding_minimal is false and padding_normal_marker is true,
// KEY_NORMAL_MARKER will be added.
template <typename RowType, bool null_first = true, bool full_encode = false>
template <typename RowType, bool is_mow = false>
void encode_key_with_padding(std::string* buf, const RowType& row, size_t num_keys,
bool padding_minimal, bool padding_normal_marker = false) {
bool padding_minimal) {
for (auto cid = 0; cid < num_keys; cid++) {
auto field = row.schema()->column(cid);
if (field == nullptr) {
if (padding_minimal) {
buf->push_back(KEY_MINIMAL_MARKER);
} else {
if (padding_normal_marker) {
buf->push_back(KEY_NORMAL_MARKER);
if (is_mow) {
buf->push_back(KEY_NORMAL_NEXT_MARKER);
} else {
buf->push_back(KEY_MAXIMAL_MARKER);
}
buf->push_back(KEY_MAXIMAL_MARKER);
}
break;
}
Expand All @@ -88,7 +89,7 @@ void encode_key_with_padding(std::string* buf, const RowType& row, size_t num_ke
continue;
}
buf->push_back(KEY_NORMAL_MARKER);
if (full_encode) {
if (is_mow) {
field->full_encode_ascending(cell.cell_ptr(), buf);
} else {
field->encode_ascending(cell.cell_ptr(), buf);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !pk_key_range --
2024-02-18 \N -4
2024-02-18 \N 9
2024-02-18 -10 -10
2024-02-18 -10 -10
2024-02-18 -10 -4
2024-02-18 -10 5
2024-02-18 -10 9
2024-02-18 -4 -10
2024-02-18 -4 -10
2024-02-18 0 4
2024-02-18 0 5
2024-02-18 0 6
2024-02-18 1 6
2024-02-18 2 9
2024-02-18 3 9
2024-02-18 5 4
2024-02-18 8 2
2024-02-18 9 1
2024-02-18 9 9

Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,46 @@ suite("test_primary_key_simple_case") {
result = sql """ SELECT * FROM ${tableName} t ORDER BY user_id; """
assertTrue(result.size() == 7)
assertTrue(result[6][10] == 25)

sql """ DROP TABLE IF EXISTS test_unique_key_range_tbl """
sql """
create table test_unique_key_range_tbl (
k1 date not null,
k2 bigint not null,
v1 int null,
v2 int not null
) UNIQUE KEY(`k1`, `k2`)
DISTRIBUTED BY HASH(`k2`) BUCKETS 30
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"enable_unique_key_merge_on_write" = "true"
);
"""

sql """
insert into test_unique_key_range_tbl values
( '2024-02-18' , -7822995176885966013 , -10 , -4 ),
( '2024-02-18' , -5987215688096912139 , 8 , 2 ),
( '2024-02-18' , -5889932400568797810 , -10 , -10 ),
( '2024-02-18' , -5051784705055344649 , 1 , 6 ),
( '2024-02-18' , -4635608137995832373 , 3 , 9 ),
( '2024-02-18' , -3836821172182966892 , -10 , -10 ),
( '2024-02-18' , -3675645188438967877 , NULL , -4 ),
( '2024-02-18' , -3363157164254363034 , 5 , 4 ),
( '2024-02-18' , -849169574767655353 , -4 , -10 ),
( '2024-02-18' , -293023807696575395 , NULL , 9 ),
( '2024-02-18' , 1167104788249072527 , 0 , 4 ),
( '2024-02-18' , 1660707941299238025 , 9 , 9 ),
( '2024-02-18' , 2852819493813807984 , 0 , 6 ),
( '2024-02-18' , 5444305694667795860 , 9 , 1 ),
( '2024-02-18' , 6136152292926889790 , 2 , 9 ),
( '2024-02-18' , 6538123407677174537 , -4 , -10 ),
( '2024-02-18' , 7958269158967938474 , -10 , 9 ),
( '2024-02-18' , 9019386549208004184 , -10 , 5 ),
( '2024-02-18' , 9208781524087970597 , 0 , 5 );
"""

qt_pk_key_range """
select k1, v1, v2 from test_unique_key_range_tbl where k1 = '2024-02-18' order by 1, 2, 3;
"""
}

0 comments on commit bf56efc

Please sign in to comment.