|
77 | 77 | #include "vec/core/types.h" |
78 | 78 | #include "vec/data_types/data_type_agg_state.h" |
79 | 79 | #include "vec/data_types/data_type_factory.hpp" |
| 80 | +#include "vec/data_types/data_type_nullable.h" |
80 | 81 | #include "vec/runtime/vdatetime_value.h" //for VecDateTime |
81 | 82 |
|
82 | 83 | namespace doris::segment_v2 { |
@@ -296,6 +297,61 @@ int64_t ColumnReader::get_metadata_size() const { |
296 | 297 | return sizeof(ColumnReader) + (_segment_zone_map ? _segment_zone_map->ByteSizeLong() : 0); |
297 | 298 | } |
298 | 299 |
|
| 300 | +#ifdef BE_TEST |
| 301 | +/// This function is only used in UT to verify the correctness of data read from zone map |
| 302 | +/// See UT case 'SegCompactionMoWTest.SegCompactionInterleaveWithBig_ooooOOoOooooooooO' |
| 303 | +/// be/test/olap/segcompaction_mow_test.cpp |
| 304 | +void ColumnReader::check_data_by_zone_map_for_test(const vectorized::MutableColumnPtr& dst) const { |
| 305 | + if (!_segment_zone_map) { |
| 306 | + return; |
| 307 | + } |
| 308 | + |
| 309 | + const auto rows = dst->size(); |
| 310 | + if (rows == 0) { |
| 311 | + return; |
| 312 | + } |
| 313 | + |
| 314 | + FieldType type = _type_info->type(); |
| 315 | + |
| 316 | + if (type != FieldType::OLAP_FIELD_TYPE_INT) { |
| 317 | + return; |
| 318 | + } |
| 319 | + |
| 320 | + auto* non_nullable_column = dst->is_nullable() |
| 321 | + ? assert_cast<vectorized::ColumnNullable*>(dst.get()) |
| 322 | + ->get_nested_column_ptr() |
| 323 | + .get() |
| 324 | + : dst.get(); |
| 325 | + |
| 326 | + /// `PredicateColumnType<TYPE_INT>` does not support `void get(size_t n, Field& res)`, |
| 327 | + /// So here only check `CoumnVector<TYPE_INT>` |
| 328 | + if (vectorized::check_and_get_column<vectorized::ColumnVector<TYPE_INT>>(non_nullable_column) == |
| 329 | + nullptr) { |
| 330 | + return; |
| 331 | + } |
| 332 | + |
| 333 | + std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, _meta_length)); |
| 334 | + std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, _meta_length)); |
| 335 | + THROW_IF_ERROR(_parse_zone_map(*_segment_zone_map, min_value.get(), max_value.get())); |
| 336 | + |
| 337 | + if (min_value->is_null() || max_value->is_null()) { |
| 338 | + return; |
| 339 | + } |
| 340 | + |
| 341 | + int32_t min_v = *reinterpret_cast<int32_t*>(min_value->cell_ptr()); |
| 342 | + int32_t max_v = *reinterpret_cast<int32_t*>(max_value->cell_ptr()); |
| 343 | + |
| 344 | + for (size_t i = 0; i != rows; ++i) { |
| 345 | + vectorized::Field field; |
| 346 | + dst->get(i, field); |
| 347 | + DCHECK(!field.is_null()); |
| 348 | + const auto v = field.get<int32_t>(); |
| 349 | + DCHECK_GE(v, min_v); |
| 350 | + DCHECK_LE(v, max_v); |
| 351 | + } |
| 352 | +} |
| 353 | +#endif |
| 354 | + |
299 | 355 | Status ColumnReader::init(const ColumnMetaPB* meta) { |
300 | 356 | _type_info = get_type_info(meta); |
301 | 357 |
|
@@ -1814,6 +1870,10 @@ Status FileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& d |
1814 | 1870 | } |
1815 | 1871 | *n -= remaining; |
1816 | 1872 | _opts.stats->bytes_read += (dst->byte_size() - curr_size) + BitmapSize(*n); |
| 1873 | + |
| 1874 | +#ifdef BE_TEST |
| 1875 | + _reader->check_data_by_zone_map_for_test(dst); |
| 1876 | +#endif |
1817 | 1877 | return Status::OK(); |
1818 | 1878 | } |
1819 | 1879 |
|
|
0 commit comments