From d93c438b0a42a5d81f9ee999a101bfe38127e0f6 Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Thu, 18 Dec 2025 11:37:59 +0800 Subject: [PATCH] [fix](olap) Should erase the segment footer cache after compaction (#59101) The cache should be erased after compaction. During compaction, segment data files are renamed. The renamed files may hit the footer cache of the previous files (since the cache key is composed of the file path and file size), so these caches need to be invalidated. Related PR: #xxx Problem Summary: None - Test - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason - Behavior changed: - [ ] No. - [ ] Yes. - Does this need documentation? - [ ] No. - [ ] Yes. - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label --- be/src/olap/page_cache.h | 6 ++ be/src/olap/rowset/beta_rowset_writer.cpp | 31 ++++++++++ be/src/olap/rowset/beta_rowset_writer.h | 1 + .../olap/rowset/segment_v2/column_reader.cpp | 60 +++++++++++++++++++ be/src/olap/rowset/segment_v2/column_reader.h | 4 ++ be/src/olap/rowset/segment_v2/segment.cpp | 9 ++- be/src/olap/rowset/segment_v2/segment.h | 4 ++ 7 files changed, 113 insertions(+), 2 deletions(-) diff --git a/be/src/olap/page_cache.h b/be/src/olap/page_cache.h index 058e67cfdee2da..8752165e8b3f7b 100644 --- a/be/src/olap/page_cache.h +++ b/be/src/olap/page_cache.h @@ -186,6 +186,12 @@ class StoragePageCache { return _get_page_cache(page_type)->mem_tracker(); } + // Erase the page with key from this cache. + void erase(const CacheKey& key, segment_v2::PageTypePB page_type) { + auto* cache = _get_page_cache(page_type); + cache->erase(key.encode()); + } + private: StoragePageCache(); diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 0eea64eb0ee88c..8aa3b89e4e55b2 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -475,6 +475,7 @@ Status BetaRowsetWriter::_rename_compacted_segments(int64_t begin, int64_t end) "failed to rename {} to {}. ret:{}, errno:{}", src_seg_path, dst_seg_path, ret, errno); } + RETURN_IF_ERROR(_remove_segment_footer_cache(_num_segcompacted, dst_seg_path)); // rename inverted index files RETURN_IF_ERROR(_rename_compacted_indices(begin, end, 0)); @@ -518,6 +519,8 @@ Status BetaRowsetWriter::_rename_compacted_segment_plain(uint32_t seg_id) { "failed to rename {} to {}. ret:{}, errno:{}", src_seg_path, dst_seg_path, ret, errno); } + + RETURN_IF_ERROR(_remove_segment_footer_cache(_num_segcompacted, dst_seg_path)); // rename remaining inverted index files RETURN_IF_ERROR(_rename_compacted_indices(-1, -1, seg_id)); @@ -525,6 +528,34 @@ Status BetaRowsetWriter::_rename_compacted_segment_plain(uint32_t seg_id) { return Status::OK(); } +Status BetaRowsetWriter::_remove_segment_footer_cache(const uint32_t seg_id, + const std::string& segment_path) { + auto* footer_page_cache = ExecEnv::GetInstance()->get_storage_page_cache(); + if (!footer_page_cache) { + return Status::OK(); + } + + auto fs = _rowset_meta->fs(); + bool exists = false; + RETURN_IF_ERROR(fs->exists(segment_path, &exists)); + if (exists) { + io::FileReaderSPtr file_reader; + io::FileReaderOptions reader_options { + .cache_type = config::enable_file_cache ? io::FileCachePolicy::FILE_BLOCK_CACHE + : io::FileCachePolicy::NO_CACHE, + .is_doris_table = true, + .cache_base_path = "", + .file_size = _rowset_meta->segment_file_size(static_cast(seg_id)), + .tablet_id = _rowset_meta->tablet_id(), + }; + RETURN_IF_ERROR(fs->open_file(segment_path, &file_reader, &reader_options)); + DCHECK(file_reader != nullptr); + auto cache_key = segment_v2::Segment::get_segment_footer_cache_key(file_reader); + footer_page_cache->erase(cache_key, segment_v2::PageTypePB::INDEX_PAGE); + } + return Status::OK(); +} + Status BetaRowsetWriter::_rename_compacted_indices(int64_t begin, int64_t end, uint64_t seg_id) { int ret; diff --git a/be/src/olap/rowset/beta_rowset_writer.h b/be/src/olap/rowset/beta_rowset_writer.h index 78c39caf568789..87fd4e1b2705dd 100644 --- a/be/src/olap/rowset/beta_rowset_writer.h +++ b/be/src/olap/rowset/beta_rowset_writer.h @@ -301,6 +301,7 @@ class BetaRowsetWriter : public BaseBetaRowsetWriter { Status _rename_compacted_segments(int64_t begin, int64_t end); Status _rename_compacted_segment_plain(uint32_t seg_id); Status _rename_compacted_indices(int64_t begin, int64_t end, uint64_t seg_id); + Status _remove_segment_footer_cache(const uint32_t seg_id, const std::string& segment_path); void _clear_statistics_for_deleting_segments_unsafe(uint32_t begin, uint32_t end); StorageEngine& _engine; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 244c4b2e0cd552..94148d8d66bcae 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -82,6 +82,7 @@ #include "vec/core/types.h" #include "vec/data_types/data_type_agg_state.h" #include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" #include "vec/runtime/vdatetime_value.h" //for VecDateTime namespace doris::segment_v2 { @@ -310,6 +311,61 @@ int64_t ColumnReader::get_metadata_size() const { return sizeof(ColumnReader) + (_segment_zone_map ? _segment_zone_map->ByteSizeLong() : 0); } +#ifdef BE_TEST +/// This function is only used in UT to verify the correctness of data read from zone map +/// See UT case 'SegCompactionMoWTest.SegCompactionInterleaveWithBig_ooooOOoOooooooooO' +/// be/test/olap/segcompaction_mow_test.cpp +void ColumnReader::check_data_by_zone_map_for_test(const vectorized::MutableColumnPtr& dst) const { + if (!_segment_zone_map) { + return; + } + + const auto rows = dst->size(); + if (rows == 0) { + return; + } + + FieldType type = _type_info->type(); + + if (type != FieldType::OLAP_FIELD_TYPE_INT) { + return; + } + + auto* non_nullable_column = dst->is_nullable() + ? assert_cast(dst.get()) + ->get_nested_column_ptr() + .get() + : dst.get(); + + /// `PredicateColumnType` does not support `void get(size_t n, Field& res)`, + /// So here only check `CoumnVector` + if (vectorized::check_and_get_column>(non_nullable_column) == + nullptr) { + return; + } + + std::unique_ptr min_value(WrapperField::create_by_type(type, _meta_length)); + std::unique_ptr max_value(WrapperField::create_by_type(type, _meta_length)); + THROW_IF_ERROR(_parse_zone_map(*_segment_zone_map, min_value.get(), max_value.get())); + + if (min_value->is_null() || max_value->is_null()) { + return; + } + + int32_t min_v = *reinterpret_cast(min_value->cell_ptr()); + int32_t max_v = *reinterpret_cast(max_value->cell_ptr()); + + for (size_t i = 0; i != rows; ++i) { + vectorized::Field field; + dst->get(i, field); + DCHECK(!field.is_null()); + const auto v = field.get(); + DCHECK_GE(v, min_v); + DCHECK_LE(v, max_v); + } +} +#endif + Status ColumnReader::init(const ColumnMetaPB* meta) { _type_info = get_type_info(meta); @@ -1350,6 +1406,10 @@ Status FileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& d } *n -= remaining; _opts.stats->bytes_read += (dst->byte_size() - curr_size) + BitmapSize(*n); + +#ifdef BE_TEST + _reader->check_data_by_zone_map_for_test(dst); +#endif return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 28b35250baf899..22d3e9a6540ab0 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -239,6 +239,10 @@ class ColumnReader : public MetadataAdder, int64_t get_metadata_size() const override; +#ifdef BE_TEST + void check_data_by_zone_map_for_test(const vectorized::MutableColumnPtr& dst) const; +#endif + private: ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB& meta, uint64_t num_rows, io::FileReaderSPtr file_reader); diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 0181870501560e..ae9b3747cc4e6b 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -1068,8 +1068,13 @@ StoragePageCache::CacheKey Segment::get_segment_footer_cache_key() const { // The footer is always at the end of the segment file. // The size of footer is 12. // So we use the size of file minus 12 as the cache key, which is unique for each segment file. - return StoragePageCache::CacheKey(_file_reader->path().native(), _file_reader->size(), - _file_reader->size() - 12); + return get_segment_footer_cache_key(_file_reader); +} + +StoragePageCache::CacheKey Segment::get_segment_footer_cache_key( + const io::FileReaderSPtr& file_reader) { + return {file_reader->path().native(), file_reader->size(), + static_cast(file_reader->size() - 12)}; } } // namespace doris::segment_v2 diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index 475e6819f89136..e4aa0de55c3938 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -31,6 +31,7 @@ #include "agent/be_exec_version_manager.h" #include "common/status.h" // Status +#include "io/fs/file_reader.h" #include "io/fs/file_reader_writer_fwd.h" #include "io/fs/file_system.h" #include "olap/field.h" @@ -211,6 +212,9 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd Status get_column_reader(int32_t col_uid, std::shared_ptr* column_reader, OlapReaderStatistics* stats); + static StoragePageCache::CacheKey get_segment_footer_cache_key( + const io::FileReaderSPtr& file_reader); + private: DISALLOW_COPY_AND_ASSIGN(Segment); Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema,