Skip to content

Commit

Permalink
pick [opt](merge-on-write) avoid to check delete bitmap while lookup …
Browse files Browse the repository at this point in the history
…rowkey in some situation to reduce CPU cost (apache#41480) (apache#41439)

Issue Number: close #xxx

cherry-pick apache#41480
  • Loading branch information
zhannngchen authored and bobhan1 committed Oct 15, 2024
1 parent fe069fd commit ccf869c
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 15 deletions.
4 changes: 2 additions & 2 deletions be/src/olap/rowset/segment_v2/segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ Status SegmentWriter::probe_key_for_mow(
RowsetSharedPtr rowset;
auto st = tablet->lookup_row_key(key, _tablet_schema.get(), have_input_seq_column,
specified_rowsets, &loc, _mow_context->max_version,
segment_caches, &rowset);
segment_caches, &rowset, true, true);
if (st.is<KEY_NOT_FOUND>()) {
if (_opts.rowset_ctx->partial_update_info->is_strict_mode) {
++stats.num_rows_filtered;
Expand Down Expand Up @@ -867,7 +867,7 @@ Status SegmentWriter::merge_rows_for_sequence_column(

st = tablet->lookup_row_key(key, _tablet_schema.get(), false, specified_rowsets, &loc,
_mow_context->max_version, segment_caches, &rowset, true,
&previous_encoded_seq_value);
true, &previous_encoded_seq_value);
DCHECK(st.is<KEY_NOT_FOUND>() || st.ok());

Slice previous_seq_slice {};
Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ Status VerticalSegmentWriter::_probe_key_for_mow(
RowsetSharedPtr rowset;
auto st = tablet->lookup_row_key(key, _tablet_schema.get(), have_input_seq_column,
specified_rowsets, &loc, _mow_context->max_version,
segment_caches, &rowset);
segment_caches, &rowset, true, true);
if (st.is<KEY_NOT_FOUND>()) {
if (_opts.rowset_ctx->partial_update_info->is_strict_mode) {
++stats.num_rows_filtered;
Expand Down Expand Up @@ -860,7 +860,7 @@ Status VerticalSegmentWriter::_merge_rows_for_sequence_column(
std::string previous_encoded_seq_value {};
st = tablet->lookup_row_key(key, _tablet_schema.get(), false, specified_rowsets, &loc,
_mow_context->max_version, segment_caches, &rowset, true,
&previous_encoded_seq_value);
true, &previous_encoded_seq_value);
DCHECK(st.is<KEY_NOT_FOUND>() || st.ok());

Slice previous_seq_slice {};
Expand Down
27 changes: 17 additions & 10 deletions be/src/olap/tablet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2856,7 +2856,7 @@ Status Tablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest_sch
const std::vector<RowsetSharedPtr>& specified_rowsets,
RowLocation* row_location, uint32_t version,
std::vector<std::unique_ptr<SegmentCacheHandle>>& segment_caches,
RowsetSharedPtr* rowset, bool with_rowid,
RowsetSharedPtr* rowset, bool with_rowid, bool is_partial_update,
std::string* encoded_seq_value) {
SCOPED_BVAR_LATENCY(g_tablet_lookup_rowkey_latency);
size_t seq_col_length = 0;
Expand All @@ -2874,6 +2874,8 @@ Status Tablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest_sch
Slice(encoded_key.get_data(), encoded_key.get_size() - seq_col_length - rowid_length);
RowLocation loc;

bool need_to_check_delete_bitmap = is_partial_update || with_seq_col;

for (size_t i = 0; i < specified_rowsets.size(); i++) {
auto& rs = specified_rowsets[i];
auto& segments_key_bounds = rs->rowset_meta()->get_segments_key_bounds();
Expand Down Expand Up @@ -2912,15 +2914,19 @@ Status Tablet::lookup_row_key(const Slice& encoded_key, TabletSchema* latest_sch
if (!s.ok() && !s.is<KEY_ALREADY_EXISTS>()) {
return s;
}
if (s.ok() && _tablet_meta->delete_bitmap().contains_agg_without_cache(
{loc.rowset_id, loc.segment_id, version}, loc.row_id)) {
// if has sequence col, we continue to compare the sequence_id of
// all rowsets, util we find an existing key.
if (schema->has_sequence_col()) {
continue;
if (s.ok() && need_to_check_delete_bitmap) {
// check if the key is already mark deleted
if (_tablet_meta->delete_bitmap().contains_agg_without_cache(
{loc.rowset_id, loc.segment_id, version}, loc.row_id)) {
// if has sequence col, we continue to compare the sequence_id of
// all rowsets, util we find an existing key.
if (with_seq_col) {
continue;
}
// The key is deleted, we need to break the loop and return
// KEY_NOT_FOUND.
break;
}
// The key is deleted, we don't need to search for it any more.
break;
}
// `st` is either OK or KEY_ALREADY_EXISTS now.
// for partial update, even if the key is already exists, we still need to
Expand Down Expand Up @@ -3097,7 +3103,8 @@ Status Tablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset,

RowsetSharedPtr rowset_find;
auto st = lookup_row_key(key, rowset_schema.get(), true, specified_rowsets, &loc,
dummy_version.first - 1, segment_caches, &rowset_find);
dummy_version.first - 1, segment_caches, &rowset_find, false,
is_partial_update);
bool expected_st = st.ok() || st.is<KEY_NOT_FOUND>() || st.is<KEY_ALREADY_EXISTS>();
// It's a defensive DCHECK, we need to exclude some common errors to avoid core-dump
// while stress test
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/tablet.h
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ class Tablet final : public BaseTablet {
RowLocation* row_location, uint32_t version,
std::vector<std::unique_ptr<SegmentCacheHandle>>& segment_caches,
RowsetSharedPtr* rowset = nullptr, bool with_rowid = true,
std::string* encoded_seq_value = nullptr);
bool is_partial_update = false, std::string* encoded_seq_value = nullptr);

// Lookup a row with TupleDescriptor and fill Block
Status lookup_row_data(const Slice& encoded_key, const RowLocation& row_location,
Expand Down

0 comments on commit ccf869c

Please sign in to comment.