Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions be/src/olap/rowset/segment_v2/row_ranges.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,13 +225,15 @@ class RowRanges {
return _ranges[_ranges.size() - 1].to();
}

size_t range_size() { return _ranges.size(); }
size_t range_size() const { return _ranges.size(); }

int64_t get_range_from(size_t range_index) { return _ranges[range_index].from(); }
RowRange get_range(size_t index) const { return _ranges[index]; }

int64_t get_range_to(size_t range_index) { return _ranges[range_index].to(); }
int64_t get_range_from(size_t range_index) const { return _ranges[range_index].from(); }

size_t get_range_count(size_t range_index) { return _ranges[range_index].count(); }
int64_t get_range_to(size_t range_index) const { return _ranges[range_index].to(); }

size_t get_range_count(size_t range_index) const { return _ranges[range_index].count(); }

std::string to_string() {
std::string result;
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/exec/format/parquet/level_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ doris::Status doris::vectorized::LevelDecoder::init_v2(const doris::Slice& level
}

size_t doris::vectorized::LevelDecoder::get_levels(doris::vectorized::level_t* levels, size_t n) {
// toto template.
if (_encoding == tparquet::Encoding::RLE) {
n = std::min((size_t)_num_levels, n);
auto num_decoded = _rle_decoder.get_values(levels, n);
Expand Down
101 changes: 101 additions & 0 deletions be/src/vec/exec/format/parquet/parquet_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,107 @@ Status FilterMap::generate_nested_filter_map(const std::vector<level_t>& rep_lev
return Status::OK();
}

Status ColumnSelectVector::init(const std::vector<uint16_t>& run_length_null_map, size_t num_values,
NullMap* null_map, FilterMap* filter_map, size_t filter_map_index,
const std::unordered_set<size_t>* skipped_indices) {
_num_values = num_values;
_num_nulls = 0;
_read_index = 0;
size_t map_index = 0;
bool is_null = false;
_has_filter = filter_map->has_filter();

if (filter_map->has_filter()) {
// No run length null map is generated when _filter_all = true
// DCHECK(!filter_map->filter_all());
_data_map.resize(num_values);
for (auto& run_length : run_length_null_map) {
if (is_null) {
_num_nulls += run_length;
for (int i = 0; i < run_length; ++i) {
_data_map[map_index++] = FILTERED_NULL;
}
} else {
for (int i = 0; i < run_length; ++i) {
_data_map[map_index++] = FILTERED_CONTENT;
}
}
is_null = !is_null;
}

size_t num_read = 0;
size_t i = 0;
size_t valid_count = 0;

while (valid_count < num_values) {
DCHECK_LT(filter_map_index + i, filter_map->filter_map_size());

if (skipped_indices != nullptr && skipped_indices->count(filter_map_index + i) > 0) {
++i;
continue;
}

if (filter_map->filter_map_data()[filter_map_index + i]) {
_data_map[valid_count] =
_data_map[valid_count] == FILTERED_NULL ? NULL_DATA : CONTENT;
num_read++;
}
++valid_count;
++i;
}

_num_filtered = num_values - num_read;

if (null_map != nullptr && num_read > 0) {
NullMap& map_data_column = *null_map;
auto null_map_index = map_data_column.size();
map_data_column.resize(null_map_index + num_read);

if (_num_nulls == 0) {
memset(map_data_column.data() + null_map_index, 0, num_read);
} else if (_num_nulls == num_values) {
memset(map_data_column.data() + null_map_index, 1, num_read);
} else {
for (i = 0; i < num_values; ++i) {
if (_data_map[i] == CONTENT) {
map_data_column[null_map_index++] = (UInt8) false;
} else if (_data_map[i] == NULL_DATA) {
map_data_column[null_map_index++] = (UInt8) true;
}
}
}
}
} else {
_num_filtered = 0;
_run_length_null_map = &run_length_null_map;
if (null_map != nullptr) {
NullMap& map_data_column = *null_map;
auto null_map_index = map_data_column.size();
map_data_column.resize(null_map_index + num_values);

for (auto& run_length : run_length_null_map) {
if (is_null) {
memset(map_data_column.data() + null_map_index, 1, run_length);
null_map_index += run_length;
_num_nulls += run_length;
} else {
memset(map_data_column.data() + null_map_index, 0, run_length);
null_map_index += run_length;
}
is_null = !is_null;
}
} else {
for (auto& run_length : run_length_null_map) {
if (is_null) {
_num_nulls += run_length;
}
is_null = !is_null;
}
}
}
return Status::OK();
}

ParsedVersion::ParsedVersion(std::string application, std::optional<std::string> version,
std::optional<std::string> app_build_hash)
: _application(std::move(application)),
Expand Down
100 changes: 1 addition & 99 deletions be/src/vec/exec/format/parquet/parquet_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,105 +104,7 @@ class ColumnSelectVector {

Status init(const std::vector<uint16_t>& run_length_null_map, size_t num_values,
NullMap* null_map, FilterMap* filter_map, size_t filter_map_index,
const std::unordered_set<size_t>* skipped_indices = nullptr) {
_num_values = num_values;
_num_nulls = 0;
_read_index = 0;
size_t map_index = 0;
bool is_null = false;
_has_filter = filter_map->has_filter();

if (filter_map->has_filter()) {
// No run length null map is generated when _filter_all = true
DCHECK(!filter_map->filter_all());
_data_map.resize(num_values);
for (auto& run_length : run_length_null_map) {
if (is_null) {
_num_nulls += run_length;
for (int i = 0; i < run_length; ++i) {
_data_map[map_index++] = FILTERED_NULL;
}
} else {
for (int i = 0; i < run_length; ++i) {
_data_map[map_index++] = FILTERED_CONTENT;
}
}
is_null = !is_null;
}

size_t num_read = 0;
size_t i = 0;
size_t valid_count = 0;

while (valid_count < num_values) {
DCHECK_LT(filter_map_index + i, filter_map->filter_map_size());

if (skipped_indices != nullptr &&
skipped_indices->count(filter_map_index + i) > 0) {
++i;
continue;
}

if (filter_map->filter_map_data()[filter_map_index + i]) {
_data_map[valid_count] =
_data_map[valid_count] == FILTERED_NULL ? NULL_DATA : CONTENT;
num_read++;
}
++valid_count;
++i;
}

_num_filtered = num_values - num_read;

if (null_map != nullptr && num_read > 0) {
NullMap& map_data_column = *null_map;
auto null_map_index = map_data_column.size();
map_data_column.resize(null_map_index + num_read);

if (_num_nulls == 0) {
memset(map_data_column.data() + null_map_index, 0, num_read);
} else if (_num_nulls == num_values) {
memset(map_data_column.data() + null_map_index, 1, num_read);
} else {
for (i = 0; i < num_values; ++i) {
if (_data_map[i] == CONTENT) {
map_data_column[null_map_index++] = (UInt8) false;
} else if (_data_map[i] == NULL_DATA) {
map_data_column[null_map_index++] = (UInt8) true;
}
}
}
}
} else {
_num_filtered = 0;
_run_length_null_map = &run_length_null_map;
if (null_map != nullptr) {
NullMap& map_data_column = *null_map;
auto null_map_index = map_data_column.size();
map_data_column.resize(null_map_index + num_values);

for (auto& run_length : run_length_null_map) {
if (is_null) {
memset(map_data_column.data() + null_map_index, 1, run_length);
null_map_index += run_length;
_num_nulls += run_length;
} else {
memset(map_data_column.data() + null_map_index, 0, run_length);
null_map_index += run_length;
}
is_null = !is_null;
}
} else {
for (auto& run_length : run_length_null_map) {
if (is_null) {
_num_nulls += run_length;
}
is_null = !is_null;
}
}
}
return Status::OK();
}
const std::unordered_set<size_t>* skipped_indices = nullptr);

size_t num_values() const { return _num_values; }

Expand Down
Loading
Loading