Skip to content

Commit a85b117

Browse files
committed
[fix](olap) Should erase the segment footer cache after compaction
1 parent 99d14a8 commit a85b117

File tree

7 files changed

+105
-2
lines changed

7 files changed

+105
-2
lines changed

be/src/olap/page_cache.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,12 @@ class StoragePageCache {
186186
return _get_page_cache(page_type)->mem_tracker();
187187
}
188188

189+
// Erase the page with key from this cache.
190+
void erase(const CacheKey& key, segment_v2::PageTypePB page_type) {
191+
auto* cache = _get_page_cache(page_type);
192+
cache->erase(key.encode());
193+
}
194+
189195
private:
190196
StoragePageCache();
191197

be/src/olap/rowset/beta_rowset_writer.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,7 @@ Status BetaRowsetWriter::_rename_compacted_segments(int64_t begin, int64_t end)
536536
"failed to rename {} to {}. ret:{}, errno:{}", src_seg_path, dst_seg_path, ret,
537537
errno);
538538
}
539+
RETURN_IF_ERROR(_remove_segment_footer_cache(dst_seg_path));
539540

540541
// rename inverted index files
541542
RETURN_IF_ERROR(_rename_compacted_indices(begin, end, 0));
@@ -579,13 +580,35 @@ Status BetaRowsetWriter::_rename_compacted_segment_plain(uint32_t seg_id) {
579580
"failed to rename {} to {}. ret:{}, errno:{}", src_seg_path, dst_seg_path, ret,
580581
errno);
581582
}
583+
584+
RETURN_IF_ERROR(_remove_segment_footer_cache(dst_seg_path));
582585
// rename remaining inverted index files
583586
RETURN_IF_ERROR(_rename_compacted_indices(-1, -1, seg_id));
584587

585588
++_num_segcompacted;
586589
return Status::OK();
587590
}
588591

592+
Status BetaRowsetWriter::_remove_segment_footer_cache(const std::string& segment_path) {
593+
auto* footer_page_cache = ExecEnv::GetInstance()->get_storage_page_cache();
594+
if (!footer_page_cache) {
595+
return Status::OK();
596+
}
597+
598+
auto fs = _rowset_meta->fs();
599+
bool exists = false;
600+
RETURN_IF_ERROR(fs->exists(segment_path, &exists));
601+
if (exists) {
602+
io::FileReaderSPtr file_reader;
603+
io::FileReaderOptions options;
604+
RETURN_IF_ERROR(fs->open_file(segment_path, &file_reader, &options));
605+
DCHECK(file_reader != nullptr);
606+
auto cache_key = segment_v2::Segment::get_segment_footer_cache_key(file_reader);
607+
footer_page_cache->erase(cache_key, segment_v2::PageTypePB::INDEX_PAGE);
608+
}
609+
return Status::OK();
610+
}
611+
589612
Status BetaRowsetWriter::_rename_compacted_indices(int64_t begin, int64_t end, uint64_t seg_id) {
590613
int ret;
591614

be/src/olap/rowset/beta_rowset_writer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ class BetaRowsetWriter : public BaseBetaRowsetWriter {
302302
Status _rename_compacted_segments(int64_t begin, int64_t end);
303303
Status _rename_compacted_segment_plain(uint32_t seg_id);
304304
Status _rename_compacted_indices(int64_t begin, int64_t end, uint64_t seg_id);
305+
Status _remove_segment_footer_cache(const std::string& segment_path);
305306
void _clear_statistics_for_deleting_segments_unsafe(uint32_t begin, uint32_t end);
306307

307308
StorageEngine& _engine;

be/src/olap/rowset/segment_v2/column_reader.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
#include "vec/core/types.h"
7878
#include "vec/data_types/data_type_agg_state.h"
7979
#include "vec/data_types/data_type_factory.hpp"
80+
#include "vec/data_types/data_type_nullable.h"
8081
#include "vec/runtime/vdatetime_value.h" //for VecDateTime
8182

8283
namespace doris::segment_v2 {
@@ -296,6 +297,61 @@ int64_t ColumnReader::get_metadata_size() const {
296297
return sizeof(ColumnReader) + (_segment_zone_map ? _segment_zone_map->ByteSizeLong() : 0);
297298
}
298299

300+
#ifdef BE_TEST
301+
/// This function is only used in UT to verify the correctness of data read from zone map
302+
/// See UT case 'SegCompactionMoWTest.SegCompactionInterleaveWithBig_ooooOOoOooooooooO'
303+
/// be/test/olap/segcompaction_mow_test.cpp
304+
void ColumnReader::check_data_by_zone_map_for_test(const vectorized::MutableColumnPtr& dst) const {
305+
if (!_segment_zone_map) {
306+
return;
307+
}
308+
309+
const auto rows = dst->size();
310+
if (rows == 0) {
311+
return;
312+
}
313+
314+
FieldType type = _type_info->type();
315+
316+
if (type != FieldType::OLAP_FIELD_TYPE_INT) {
317+
return;
318+
}
319+
320+
auto* non_nullable_column = dst->is_nullable()
321+
? assert_cast<vectorized::ColumnNullable*>(dst.get())
322+
->get_nested_column_ptr()
323+
.get()
324+
: dst.get();
325+
326+
/// `PredicateColumnType<TYPE_INT>` does not support `void get(size_t n, Field& res)`,
327+
/// So here only check `CoumnVector<TYPE_INT>`
328+
if (vectorized::check_and_get_column<vectorized::ColumnVector<TYPE_INT>>(non_nullable_column) ==
329+
nullptr) {
330+
return;
331+
}
332+
333+
std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, _meta_length));
334+
std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, _meta_length));
335+
THROW_IF_ERROR(_parse_zone_map(*_segment_zone_map, min_value.get(), max_value.get()));
336+
337+
if (min_value->is_null() || max_value->is_null()) {
338+
return;
339+
}
340+
341+
int32_t min_v = *reinterpret_cast<int32_t*>(min_value->cell_ptr());
342+
int32_t max_v = *reinterpret_cast<int32_t*>(max_value->cell_ptr());
343+
344+
for (size_t i = 0; i != rows; ++i) {
345+
vectorized::Field field;
346+
dst->get(i, field);
347+
DCHECK(!field.is_null());
348+
const auto v = field.get<int32_t>();
349+
DCHECK_GE(v, min_v);
350+
DCHECK_LE(v, max_v);
351+
}
352+
}
353+
#endif
354+
299355
Status ColumnReader::init(const ColumnMetaPB* meta) {
300356
_type_info = get_type_info(meta);
301357

@@ -1814,6 +1870,10 @@ Status FileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& d
18141870
}
18151871
*n -= remaining;
18161872
_opts.stats->bytes_read += (dst->byte_size() - curr_size) + BitmapSize(*n);
1873+
1874+
#ifdef BE_TEST
1875+
_reader->check_data_by_zone_map_for_test(dst);
1876+
#endif
18171877
return Status::OK();
18181878
}
18191879

be/src/olap/rowset/segment_v2/column_reader.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,10 @@ class ColumnReader : public MetadataAdder<ColumnReader>,
231231

232232
int64_t get_metadata_size() const override;
233233

234+
#ifdef BE_TEST
235+
void check_data_by_zone_map_for_test(const vectorized::MutableColumnPtr& dst) const;
236+
#endif
237+
234238
private:
235239
friend class VariantColumnReader;
236240

be/src/olap/rowset/segment_v2/segment.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,8 +1019,13 @@ StoragePageCache::CacheKey Segment::get_segment_footer_cache_key() const {
10191019
// The footer is always at the end of the segment file.
10201020
// The size of footer is 12.
10211021
// So we use the size of file minus 12 as the cache key, which is unique for each segment file.
1022-
return StoragePageCache::CacheKey(_file_reader->path().native(), _file_reader->size(),
1023-
_file_reader->size() - 12);
1022+
return get_segment_footer_cache_key(_file_reader);
1023+
}
1024+
1025+
StoragePageCache::CacheKey Segment::get_segment_footer_cache_key(
1026+
const io::FileReaderSPtr& file_reader) {
1027+
return {file_reader->path().native(), file_reader->size(),
1028+
static_cast<int64_t>(file_reader->size() - 12)};
10241029
}
10251030

10261031
#include "common/compile_check_end.h"

be/src/olap/rowset/segment_v2/segment.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
#include "agent/be_exec_version_manager.h"
3333
#include "common/status.h" // Status
34+
#include "io/fs/file_reader.h"
3435
#include "io/fs/file_reader_writer_fwd.h"
3536
#include "io/fs/file_system.h"
3637
#include "olap/field.h"
@@ -207,6 +208,9 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
207208

208209
Status traverse_column_meta_pbs(const std::function<void(const ColumnMetaPB&)>& visitor);
209210

211+
static StoragePageCache::CacheKey get_segment_footer_cache_key(
212+
const io::FileReaderSPtr& file_reader);
213+
210214
private:
211215
DISALLOW_COPY_AND_ASSIGN(Segment);
212216
Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema,

0 commit comments

Comments
 (0)