-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[feat] Virtual Slot Ref #52701
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[feat] Virtual Slot Ref #52701
Changes from all commits
11d3c86
3cacd82
5614240
056b84f
6a8ce20
c24a9ce
d55a524
8b52452
3b386d9
af5fc37
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,168 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| #include "virtual_column_iterator.h" | ||
|
|
||
| #include <cstddef> | ||
| #include <cstring> | ||
| #include <memory> | ||
|
|
||
| #include "vec/columns/column.h" | ||
| #include "vec/columns/column_nothing.h" | ||
|
|
||
zhiqiang-hhhh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| namespace doris::segment_v2 { | ||
|
|
||
| VirtualColumnIterator::VirtualColumnIterator() | ||
| : _materialized_column_ptr(vectorized::ColumnNothing::create(0)) {} | ||
|
|
||
| // Init implementation | ||
| Status VirtualColumnIterator::init(const ColumnIteratorOptions& opts) { | ||
| // Virtual column doesn't need special initialization | ||
| return Status::OK(); | ||
| } | ||
|
|
||
| void VirtualColumnIterator::prepare_materialization(vectorized::IColumn::Ptr column, | ||
| std::unique_ptr<std::vector<uint64_t>> labels) { | ||
| DCHECK(labels->size() == column->size()) << "labels size: " << labels->size() | ||
| << ", materialized column size: " << column->size(); | ||
| // 1. do sort to labels | ||
| // column: [100, 101, 102, 99, 50, 49] | ||
| // lables: [5, 4, 1, 10, 7, 2] | ||
| const std::vector<uint64_t>& labels_ref = *labels; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. std::vector<std::pair<int, int>> labels_order; sort first <1,2> <2,5> <4,1> ... <10,3>
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
applied |
||
| const size_t n = labels_ref.size(); | ||
| VLOG_DEBUG << fmt::format("Input labels {}", fmt::join(labels_ref, ", ")); | ||
| if (n == 0) { | ||
| _size = 0; | ||
| _max_ordinal = 0; | ||
| return; | ||
| } | ||
| std::vector<std::pair<size_t, size_t>> order(n); | ||
| // {5:0, 4:1, 1:2, 10:3, 7:4, 2:5} | ||
| for (size_t i = 0; i < n; ++i) { | ||
| order[i] = {labels_ref[i], i}; | ||
| } | ||
| // Sort by labels, so we can scatter the column by global row id. | ||
| // After sort, order will be: | ||
| // order: {1-2, 2-5, 4-1, 5-0, 7-4, 10-3} | ||
| std::sort(order.begin(), order.end(), | ||
| [&](const auto& a, const auto& b) { return a.first < b.first; }); | ||
| _max_ordinal = order[n - 1].first; | ||
| // 2. scatter column | ||
| auto scattered_column = column->clone_empty(); | ||
| // We need a mapping from global row id to local index in the materialized column. | ||
| _row_id_to_idx.clear(); | ||
| for (size_t i = 0; i < n; ++i) { | ||
| size_t global_idx = order[i].first; // global row id | ||
| size_t original_col_idx = order[i].second; // original index in the column | ||
| _row_id_to_idx[global_idx] = i; | ||
| scattered_column->insert_from(*column, original_col_idx); | ||
| } | ||
|
|
||
| // After scatter: | ||
| // scattered_column: [102, 49, 101, 100, 50, 99] | ||
| // _row_id_to_idx: {1:0, 2:1, 4:2, 5:3, 7:4, 10:5} | ||
| _materialized_column_ptr = std::move(scattered_column); | ||
|
|
||
| _size = n; | ||
|
|
||
| std::string msg; | ||
| for (const auto& pair : _row_id_to_idx) { | ||
| msg += fmt::format("{}: {}, ", pair.first, pair.second); | ||
| } | ||
|
|
||
| VLOG_DEBUG << fmt::format("virtual column iterator, row_idx_to_idx:\n{}", msg); | ||
| _filter = doris::vectorized::IColumn::Filter(_size, 0); | ||
| } | ||
|
|
||
| Status VirtualColumnIterator::seek_to_ordinal(ordinal_t ord_idx) { | ||
| if (_size == 0 || | ||
| vectorized::check_and_get_column<vectorized::ColumnNothing>(*_materialized_column_ptr)) { | ||
| // _materialized_column is not set. do nothing. | ||
| return Status::OK(); | ||
| } | ||
|
|
||
| if (ord_idx >= _max_ordinal) { | ||
| return Status::InternalError("Seek to ordinal out of range: {} out of {}", ord_idx, | ||
| _max_ordinal); | ||
| } | ||
|
|
||
| _current_ordinal = ord_idx; | ||
|
|
||
| return Status::OK(); | ||
| } | ||
|
|
||
| // Next batch implementation | ||
| Status VirtualColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, | ||
| bool* has_null) { | ||
| size_t rows_num_to_read = *n; | ||
| if (rows_num_to_read == 0 || | ||
| vectorized::check_and_get_column<vectorized::ColumnNothing>(*_materialized_column_ptr)) { | ||
| return Status::OK(); | ||
| } | ||
|
|
||
| if (_row_id_to_idx.find(_current_ordinal) == _row_id_to_idx.end()) { | ||
| return Status::InternalError("Current ordinal {} not found in row_id_to_idx map", | ||
| _current_ordinal); | ||
| } | ||
|
|
||
| // Update dst column | ||
| if (vectorized::check_and_get_column<vectorized::ColumnNothing>(*dst)) { | ||
| VLOG_DEBUG << fmt::format("Dst is nothing column, create new mutable column"); | ||
| dst = _materialized_column_ptr->clone_empty(); | ||
| } | ||
|
|
||
| size_t start = _row_id_to_idx[_current_ordinal]; | ||
| dst->insert_range_from(*_materialized_column_ptr, start, rows_num_to_read); | ||
|
|
||
| VLOG_DEBUG << fmt::format("Virtual column iterators, next_batch, rows reads: {}, dst size: {}", | ||
| rows_num_to_read, dst->size()); | ||
|
|
||
| _current_ordinal += rows_num_to_read; | ||
| return Status::OK(); | ||
| } | ||
|
|
||
| Status VirtualColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, | ||
| vectorized::MutableColumnPtr& dst) { | ||
| if (count == 0 || | ||
| vectorized::check_and_get_column<vectorized::ColumnNothing>(*_materialized_column_ptr)) { | ||
| return Status::OK(); | ||
| } | ||
|
|
||
| memset(_filter.data(), 0, _size); | ||
|
|
||
| // Convert rowids to filter | ||
| for (size_t i = 0; i < count; ++i) { | ||
| _filter[_row_id_to_idx[rowids[i]]] = 1; | ||
| } | ||
|
|
||
| // Apply filter to materialized column | ||
| doris::vectorized::IColumn::Ptr res_col = _materialized_column_ptr->filter(_filter, 0); | ||
| // Update dst column | ||
| if (vectorized::check_and_get_column<vectorized::ColumnNothing>(*dst)) { | ||
| VLOG_DEBUG << fmt::format("Dst is nothing column, create new mutable column"); | ||
| dst = res_col->assume_mutable(); | ||
| } else { | ||
| dst->insert_range_from(*res_col, 0, res_col->size()); | ||
| } | ||
|
|
||
| VLOG_DEBUG << fmt::format( | ||
| "Virtual column iterators, read_by_rowids, rowids size: {}, dst size: {}", count, | ||
| dst->size()); | ||
| return Status::OK(); | ||
| } | ||
|
|
||
| } // namespace doris::segment_v2 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <sys/types.h> | ||
|
|
||
| #include <cstdint> | ||
| #include <unordered_map> | ||
|
|
||
| #include "column_reader.h" | ||
| #include "common/be_mock_util.h" | ||
| #include "vec/columns/column.h" | ||
|
|
||
| namespace doris::segment_v2 { | ||
|
|
||
| class VirtualColumnIterator : public ColumnIterator { | ||
| public: | ||
| VirtualColumnIterator(); | ||
| ~VirtualColumnIterator() override = default; | ||
|
|
||
| MOCK_FUNCTION void prepare_materialization(vectorized::IColumn::Ptr column, | ||
| std::unique_ptr<std::vector<uint64_t>> labels); | ||
|
|
||
| Status init(const ColumnIteratorOptions& opts) override; | ||
|
|
||
| Status seek_to_ordinal(ordinal_t ord_idx) override; | ||
|
|
||
| Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override; | ||
|
|
||
| Status read_by_rowids(const rowid_t* rowids, const size_t count, | ||
| vectorized::MutableColumnPtr& dst) override; | ||
|
|
||
| ordinal_t get_current_ordinal() const override { return 0; } | ||
|
|
||
| #ifdef BE_TEST | ||
| vectorized::IColumn::Ptr get_materialized_column() const { return _materialized_column_ptr; } | ||
| const std::unordered_map<size_t, size_t>& get_row_id_to_idx() const { return _row_id_to_idx; } | ||
| #endif | ||
| private: | ||
| vectorized::IColumn::Ptr _materialized_column_ptr; | ||
| // segment rowid to index in column. | ||
| std::unordered_map<size_t, size_t> _row_id_to_idx; | ||
| doris::vectorized::IColumn::Filter _filter; | ||
| size_t _size = 0; | ||
| size_t _max_ordinal = 0; | ||
| ordinal_t _current_ordinal = 0; | ||
| }; | ||
|
|
||
| } // namespace doris::segment_v2 |
Uh oh!
There was an error while loading. Please reload this page.