|
76 | 76 | #include "vec/core/types.h" |
77 | 77 | #include "vec/data_types/data_type_agg_state.h" |
78 | 78 | #include "vec/data_types/data_type_factory.hpp" |
| 79 | +#include "vec/data_types/data_type_nullable.h" |
79 | 80 | #include "vec/runtime/vdatetime_value.h" //for VecDateTime |
80 | 81 |
|
81 | 82 | namespace doris::segment_v2 { |
@@ -295,6 +296,61 @@ int64_t ColumnReader::get_metadata_size() const { |
295 | 296 | return sizeof(ColumnReader) + (_segment_zone_map ? _segment_zone_map->ByteSizeLong() : 0); |
296 | 297 | } |
297 | 298 |
|
| 299 | +#ifdef BE_TEST |
| 300 | +/// This function is only used in UT to verify the correctness of data read from zone map |
| 301 | +/// See UT case 'SegCompactionMoWTest.SegCompactionInterleaveWithBig_ooooOOoOooooooooO' |
| 302 | +/// be/test/olap/segcompaction_mow_test.cpp |
| 303 | +void ColumnReader::check_data_by_zone_map_for_test(const vectorized::MutableColumnPtr& dst) const { |
| 304 | + if (!_segment_zone_map) { |
| 305 | + return; |
| 306 | + } |
| 307 | + |
| 308 | + const auto rows = dst->size(); |
| 309 | + if (rows == 0) { |
| 310 | + return; |
| 311 | + } |
| 312 | + |
| 313 | + FieldType type = _type_info->type(); |
| 314 | + |
| 315 | + if (type != FieldType::OLAP_FIELD_TYPE_INT) { |
| 316 | + return; |
| 317 | + } |
| 318 | + |
| 319 | + auto* non_nullable_column = dst->is_nullable() |
| 320 | + ? assert_cast<vectorized::ColumnNullable*>(dst.get()) |
| 321 | + ->get_nested_column_ptr() |
| 322 | + .get() |
| 323 | + : dst.get(); |
| 324 | + |
| 325 | + /// `PredicateColumnType<TYPE_INT>` does not support `void get(size_t n, Field& res)`, |
| 326 | + /// So here only check `CoumnVector<TYPE_INT>` |
| 327 | + if (vectorized::check_and_get_column<vectorized::ColumnVector<TYPE_INT>>(non_nullable_column) == |
| 328 | + nullptr) { |
| 329 | + return; |
| 330 | + } |
| 331 | + |
| 332 | + std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type, _meta_length)); |
| 333 | + std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type, _meta_length)); |
| 334 | + THROW_IF_ERROR(_parse_zone_map(*_segment_zone_map, min_value.get(), max_value.get())); |
| 335 | + |
| 336 | + if (min_value->is_null() || max_value->is_null()) { |
| 337 | + return; |
| 338 | + } |
| 339 | + |
| 340 | + int32_t min_v = *reinterpret_cast<int32_t*>(min_value->cell_ptr()); |
| 341 | + int32_t max_v = *reinterpret_cast<int32_t*>(max_value->cell_ptr()); |
| 342 | + |
| 343 | + for (size_t i = 0; i != rows; ++i) { |
| 344 | + vectorized::Field field; |
| 345 | + dst->get(i, field); |
| 346 | + DCHECK(!field.is_null()); |
| 347 | + const auto v = field.get<int32_t>(); |
| 348 | + DCHECK_GE(v, min_v); |
| 349 | + DCHECK_LE(v, max_v); |
| 350 | + } |
| 351 | +} |
| 352 | +#endif |
| 353 | + |
298 | 354 | Status ColumnReader::init(const ColumnMetaPB* meta) { |
299 | 355 | _type_info = get_type_info(meta); |
300 | 356 |
|
@@ -1349,6 +1405,10 @@ Status FileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& d |
1349 | 1405 | } |
1350 | 1406 | *n -= remaining; |
1351 | 1407 | _opts.stats->bytes_read += (dst->byte_size() - curr_size) + BitmapSize(*n); |
| 1408 | + |
| 1409 | +#ifdef BE_TEST |
| 1410 | + _reader->check_data_by_zone_map_for_test(dst); |
| 1411 | +#endif |
1352 | 1412 | return Status::OK(); |
1353 | 1413 | } |
1354 | 1414 |
|
|
0 commit comments