Skip to content

Commit

Permalink
[fix](memory) Fix init segment map check memory exceeds limit (apache…
Browse files Browse the repository at this point in the history
…#44092)

### What problem does this PR solve?

Problem Summary:

`RowIdConversion::init_segment_map` may use a lot of memory in
compaction. however, this part of memory is not controlled, the process
may OOM crash.

If process has less than 10M available memory before generating
`rowid_map` for each segment in `init_segment_map`, it will terminate.


![image](https://github.com/user-attachments/assets/652fadc9-bca0-4edd-a03e-04f2546cdf76)
  • Loading branch information
xinyiZzz committed Dec 24, 2024
1 parent aa728ae commit 745dff9
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 13 deletions.
30 changes: 20 additions & 10 deletions be/src/olap/rowid_conversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,33 @@ class RowIdConversion {
~RowIdConversion() { RELEASE_THREAD_MEM_TRACKER(_seg_rowid_map_mem_used); }

// resize segment rowid map to its rows num
void init_segment_map(const RowsetId& src_rowset_id, const std::vector<uint32_t>& num_rows) {
size_t delta_std_pair_cap = 0;
Status init_segment_map(const RowsetId& src_rowset_id, const std::vector<uint32_t>& num_rows) {
for (size_t i = 0; i < num_rows.size(); i++) {
constexpr size_t RESERVED_MEMORY = 10 * 1024 * 1024; // 10M
if (doris::GlobalMemoryArbitrator::is_exceed_hard_mem_limit(RESERVED_MEMORY)) {
return Status::MemoryLimitExceeded(fmt::format(
"RowIdConversion init_segment_map failed, memory exceed limit, {}, "
"consuming "
"tracker:<{}>, peak used {}, current used {}.",
doris::GlobalMemoryArbitrator::process_limit_exceeded_errmsg_str(),
doris::thread_context()->thread_mem_tracker()->label(),
doris::thread_context()->thread_mem_tracker()->peak_consumption(),
doris::thread_context()->thread_mem_tracker()->consumption()));
}

uint32_t id = _segments_rowid_map.size();
_segment_to_id_map.emplace(std::pair<RowsetId, uint32_t> {src_rowset_id, i}, id);
_id_to_segment_map.emplace_back(src_rowset_id, i);
std::vector<std::pair<uint32_t, uint32_t>> vec(
num_rows[i], std::pair<uint32_t, uint32_t>(UINT32_MAX, UINT32_MAX));
delta_std_pair_cap += vec.capacity();

//NOTE: manually count _segments_rowid_map's memory here, because _segments_rowid_map could be used by indexCompaction.
// indexCompaction is a thridparty code, it's too complex to modify it.
// refer compact_column.
track_mem_usage(vec.capacity());
_segments_rowid_map.emplace_back(std::move(vec));
}
//NOTE: manually count _segments_rowid_map's memory here, because _segments_rowid_map could be used by indexCompaction.
// indexCompaction is a thridparty code, it's too complex to modify it.
// refer compact_column.
track_mem_usage(delta_std_pair_cap);
return Status::OK();
}

// set dst rowset id
Expand Down Expand Up @@ -124,9 +136,7 @@ class RowIdConversion {
size_t new_size =
_std_pair_cap * sizeof(std::pair<uint32_t, uint32_t>) +
_segments_rowid_map.capacity() * sizeof(std::vector<std::pair<uint32_t, uint32_t>>);

RELEASE_THREAD_MEM_TRACKER(_seg_rowid_map_mem_used);
CONSUME_THREAD_MEM_TRACKER(new_size);
CONSUME_THREAD_MEM_TRACKER(new_size - _seg_rowid_map_mem_used);
_seg_rowid_map_mem_used = new_size;
}

Expand Down
3 changes: 2 additions & 1 deletion be/src/olap/rowset/beta_rowset_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
// init segment rowid map for rowid conversion
std::vector<uint32_t> segment_num_rows;
RETURN_IF_ERROR(get_segment_num_rows(&segment_num_rows));
_read_context->rowid_conversion->init_segment_map(rowset()->rowset_id(), segment_num_rows);
RETURN_IF_ERROR(_read_context->rowid_conversion->init_segment_map(rowset()->rowset_id(),
segment_num_rows));
}

auto [seg_start, seg_end] = _segment_offsets;
Expand Down
6 changes: 4 additions & 2 deletions be/test/olap/rowid_conversion_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,10 +495,12 @@ TEST_F(TestRowIdConversion, Basic) {
RowIdConversion rowid_conversion;
src_rowset.init(0);
std::vector<uint32_t> rs0_segment_num_rows = {4, 3};
rowid_conversion.init_segment_map(src_rowset, rs0_segment_num_rows);
auto st = rowid_conversion.init_segment_map(src_rowset, rs0_segment_num_rows);
EXPECT_EQ(st.ok(), true);
src_rowset.init(1);
std::vector<uint32_t> rs1_segment_num_rows = {4};
rowid_conversion.init_segment_map(src_rowset, rs1_segment_num_rows);
st = rowid_conversion.init_segment_map(src_rowset, rs1_segment_num_rows);
EXPECT_EQ(st.ok(), true);
rowid_conversion.set_dst_rowset_id(dst_rowset);

std::vector<uint32_t> dst_segment_num_rows = {4, 3, 4};
Expand Down

0 comments on commit 745dff9

Please sign in to comment.