Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[refactor](agg) Refactor agg-related metrics #42898

Merged
merged 1 commit into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions be/src/pipeline/exec/aggregation_sink_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,13 @@ Status AggSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& info) {
Base::profile(), "MemoryUsageSerializeKeyArena", TUnit::BYTES, 1);

_build_timer = ADD_TIMER(Base::profile(), "BuildTime");
_serialize_key_timer = ADD_TIMER(Base::profile(), "SerializeKeyTime");
_exec_timer = ADD_TIMER(Base::profile(), "ExecTime");
_merge_timer = ADD_TIMER(Base::profile(), "MergeTime");
_expr_timer = ADD_TIMER(Base::profile(), "ExprTime");
_serialize_data_timer = ADD_TIMER(Base::profile(), "SerializeDataTime");
_deserialize_data_timer = ADD_TIMER(Base::profile(), "DeserializeAndMergeTime");
_hash_table_compute_timer = ADD_TIMER(Base::profile(), "HashTableComputeTime");
_hash_table_limit_compute_timer = ADD_TIMER(Base::profile(), "DoLimitComputeTime");
_hash_table_emplace_timer = ADD_TIMER(Base::profile(), "HashTableEmplaceTime");
_hash_table_input_counter = ADD_COUNTER(Base::profile(), "HashTableInputCount", TUnit::UNIT);
_max_row_size_counter = ADD_COUNTER(Base::profile(), "MaxRowSizeInBytes", TUnit::UNIT);

return Status::OK();
}
Expand Down
3 changes: 0 additions & 3 deletions be/src/pipeline/exec/aggregation_sink_operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,8 @@ class AggSinkLocalState : public PipelineXSinkLocalState<AggSharedState> {
RuntimeProfile::Counter* _hash_table_input_counter = nullptr;
RuntimeProfile::Counter* _build_timer = nullptr;
RuntimeProfile::Counter* _expr_timer = nullptr;
RuntimeProfile::Counter* _serialize_key_timer = nullptr;
RuntimeProfile::Counter* _merge_timer = nullptr;
RuntimeProfile::Counter* _serialize_data_timer = nullptr;
RuntimeProfile::Counter* _deserialize_data_timer = nullptr;
RuntimeProfile::Counter* _max_row_size_counter = nullptr;
RuntimeProfile::Counter* _hash_table_memory_usage = nullptr;
RuntimeProfile::Counter* _hash_table_size_counter = nullptr;
RuntimeProfile::Counter* _serialize_key_arena_memory_usage = nullptr;
Expand Down
47 changes: 12 additions & 35 deletions be/src/pipeline/exec/aggregation_source_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,18 @@ namespace doris::pipeline {
AggLocalState::AggLocalState(RuntimeState* state, OperatorXBase* parent)
: Base(state, parent),
_get_results_timer(nullptr),
_serialize_result_timer(nullptr),
_hash_table_iterate_timer(nullptr),
_insert_keys_to_column_timer(nullptr),
_serialize_data_timer(nullptr) {}
_insert_values_to_column_timer(nullptr) {}
yiguolei marked this conversation as resolved.
Show resolved Hide resolved

Status AggLocalState::init(RuntimeState* state, LocalStateInfo& info) {
RETURN_IF_ERROR(Base::init(state, info));
SCOPED_TIMER(exec_time_counter());
SCOPED_TIMER(_init_timer);
_get_results_timer = ADD_TIMER(profile(), "GetResultsTime");
_serialize_result_timer = ADD_TIMER(profile(), "SerializeResultTime");
_hash_table_iterate_timer = ADD_TIMER(profile(), "HashTableIterateTime");
_insert_keys_to_column_timer = ADD_TIMER(profile(), "InsertKeysToColumnTime");
_serialize_data_timer = ADD_TIMER(profile(), "SerializeDataTime");
_insert_values_to_column_timer = ADD_TIMER(profile(), "InsertValuesToColumnTime");

_merge_timer = ADD_TIMER(Base::profile(), "MergeTime");
_deserialize_data_timer = ADD_TIMER(Base::profile(), "DeserializeAndMergeTime");
Expand All @@ -58,7 +56,7 @@ Status AggLocalState::init(RuntimeState* state, LocalStateInfo& info) {
std::placeholders::_1, std::placeholders::_2,
std::placeholders::_3);
} else {
_executor.get_result = std::bind<Status>(&AggLocalState::_serialize_without_key, this,
_executor.get_result = std::bind<Status>(&AggLocalState::_get_results_without_key, this,
std::placeholders::_1, std::placeholders::_2,
std::placeholders::_3);
}
Expand All @@ -69,8 +67,8 @@ Status AggLocalState::init(RuntimeState* state, LocalStateInfo& info) {
std::placeholders::_2, std::placeholders::_3);
} else {
_executor.get_result = std::bind<Status>(
&AggLocalState::_serialize_with_serialized_key_result, this,
std::placeholders::_1, std::placeholders::_2, std::placeholders::_3);
&AggLocalState::_get_results_with_serialized_key, this, std::placeholders::_1,
std::placeholders::_2, std::placeholders::_3);
}
}

Expand All @@ -94,18 +92,9 @@ Status AggLocalState::_create_agg_status(vectorized::AggregateDataPtr data) {
return Status::OK();
}

Status AggLocalState::_destroy_agg_status(vectorized::AggregateDataPtr data) {
auto& shared_state = *Base::_shared_state;
for (int i = 0; i < shared_state.aggregate_evaluators.size(); ++i) {
shared_state.aggregate_evaluators[i]->function()->destroy(
data + shared_state.offsets_of_aggregate_states[i]);
}
return Status::OK();
}

Status AggLocalState::_serialize_with_serialized_key_result(RuntimeState* state,
vectorized::Block* block, bool* eos) {
SCOPED_TIMER(_serialize_result_timer);
Status AggLocalState::_get_results_with_serialized_key(RuntimeState* state,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: function '_get_results_with_serialized_key' exceeds recommended size/complexity thresholds [readability-function-size]

Status AggLocalState::_get_results_with_serialized_key(RuntimeState* state,
                      ^
Additional context

be/src/pipeline/exec/aggregation_source_operator.cpp:94: 115 lines including whitespace and comments (threshold 80)

Status AggLocalState::_get_results_with_serialized_key(RuntimeState* state,
                      ^

vectorized::Block* block, bool* eos) {
SCOPED_TIMER(_get_results_timer);
auto& shared_state = *_shared_state;
size_t key_size = _shared_state->probe_expr_ctxs.size();
size_t agg_size = _shared_state->aggregate_evaluators.size();
Expand All @@ -125,7 +114,6 @@ Status AggLocalState::_serialize_with_serialized_key_result(RuntimeState* state,
}
}

SCOPED_TIMER(_get_results_timer);
std::visit(
vectorized::Overload {
[&](std::monostate& arg) -> void {
Expand Down Expand Up @@ -181,7 +169,7 @@ Status AggLocalState::_serialize_with_serialized_key_result(RuntimeState* state,
}

{
SCOPED_TIMER(_serialize_data_timer);
SCOPED_TIMER(_insert_values_to_column_timer);
for (size_t i = 0; i < shared_state.aggregate_evaluators.size(); ++i) {
value_data_types[i] = shared_state.aggregate_evaluators[i]
->function()
Expand Down Expand Up @@ -333,13 +321,13 @@ Status AggLocalState::_get_with_serialized_key_result(RuntimeState* state, vecto
return Status::OK();
}

Status AggLocalState::_serialize_without_key(RuntimeState* state, vectorized::Block* block,
bool* eos) {
Status AggLocalState::_get_results_without_key(RuntimeState* state, vectorized::Block* block,
bool* eos) {
SCOPED_TIMER(_get_results_timer);
auto& shared_state = *_shared_state;
// 1. `child(0)->rows_returned() == 0` mean not data from child
// in level two aggregation node should return NULL result
// level one aggregation node set `eos = true` return directly
SCOPED_TIMER(_serialize_result_timer);
if (UNLIKELY(_shared_state->input_num_rows == 0)) {
*eos = true;
return Status::OK();
Expand Down Expand Up @@ -573,17 +561,6 @@ template Status AggSourceOperatorX::merge_with_serialized_key_helper<true>(
template Status AggSourceOperatorX::merge_with_serialized_key_helper<false>(
RuntimeState* state, vectorized::Block* block);

size_t AggLocalState::_get_hash_table_size() {
return std::visit(
vectorized::Overload {[&](std::monostate& arg) -> size_t {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"uninited hash table");
return 0;
},
[&](auto& agg_method) { return agg_method.hash_table->size(); }},
_shared_state->agg_data->method_variant);
}

void AggLocalState::_emplace_into_hash_table(vectorized::AggregateDataPtr* places,
vectorized::ColumnRawPtrs& key_columns,
size_t num_rows) {
Expand Down
11 changes: 4 additions & 7 deletions be/src/pipeline/exec/aggregation_source_operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,12 @@ class AggLocalState final : public PipelineXLocalState<AggSharedState> {
friend class AggSourceOperatorX;

Status _get_without_key_result(RuntimeState* state, vectorized::Block* block, bool* eos);
Status _serialize_without_key(RuntimeState* state, vectorized::Block* block, bool* eos);
Status _get_results_without_key(RuntimeState* state, vectorized::Block* block, bool* eos);
Status _get_with_serialized_key_result(RuntimeState* state, vectorized::Block* block,
bool* eos);
Status _serialize_with_serialized_key_result(RuntimeState* state, vectorized::Block* block,
bool* eos);
Status _get_results_with_serialized_key(RuntimeState* state, vectorized::Block* block,
bool* eos);
Status _create_agg_status(vectorized::AggregateDataPtr data);
Status _destroy_agg_status(vectorized::AggregateDataPtr data);
void _make_nullable_output_key(vectorized::Block* block) {
if (block->rows() != 0) {
auto& shared_state = *Base ::_shared_state;
Expand All @@ -68,16 +67,14 @@ class AggLocalState final : public PipelineXLocalState<AggSharedState> {
vectorized::ColumnRawPtrs& key_columns, size_t num_rows);
void _emplace_into_hash_table(vectorized::AggregateDataPtr* places,
vectorized::ColumnRawPtrs& key_columns, size_t num_rows);
size_t _get_hash_table_size();

vectorized::PODArray<vectorized::AggregateDataPtr> _places;
std::vector<char> _deserialize_buffer;

RuntimeProfile::Counter* _get_results_timer = nullptr;
RuntimeProfile::Counter* _serialize_result_timer = nullptr;
RuntimeProfile::Counter* _hash_table_iterate_timer = nullptr;
RuntimeProfile::Counter* _insert_keys_to_column_timer = nullptr;
RuntimeProfile::Counter* _serialize_data_timer = nullptr;
RuntimeProfile::Counter* _insert_values_to_column_timer = nullptr;

RuntimeProfile::Counter* _hash_table_compute_timer = nullptr;
RuntimeProfile::Counter* _hash_table_emplace_timer = nullptr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ Status DistinctStreamingAggLocalState::init(RuntimeState* state, LocalStateInfo&
SCOPED_TIMER(Base::exec_time_counter());
SCOPED_TIMER(Base::_init_timer);
_build_timer = ADD_TIMER(Base::profile(), "BuildTime");
_exec_timer = ADD_TIMER(Base::profile(), "ExecTime");
_hash_table_compute_timer = ADD_TIMER(Base::profile(), "HashTableComputeTime");
_hash_table_emplace_timer = ADD_TIMER(Base::profile(), "HashTableEmplaceTime");
_hash_table_input_counter = ADD_COUNTER(Base::profile(), "HashTableInputCount", TUnit::UNIT);
Expand Down
28 changes: 10 additions & 18 deletions be/src/pipeline/exec/streaming_aggregation_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,25 +93,18 @@ Status StreamingAggLocalState::init(RuntimeState* state, LocalStateInfo& info) {
"MemoryUsageSerializeKeyArena", TUnit::BYTES, "", 1);

_build_timer = ADD_TIMER(Base::profile(), "BuildTime");
_build_table_convert_timer = ADD_TIMER(Base::profile(), "BuildConvertToPartitionedTime");
_serialize_key_timer = ADD_TIMER(Base::profile(), "SerializeKeyTime");
_exec_timer = ADD_TIMER(Base::profile(), "ExecTime");
_merge_timer = ADD_TIMER(Base::profile(), "MergeTime");
_expr_timer = ADD_TIMER(Base::profile(), "ExprTime");
_serialize_data_timer = ADD_TIMER(Base::profile(), "SerializeDataTime");
_insert_values_to_column_timer = ADD_TIMER(Base::profile(), "InsertValuesToColumnTime");
_deserialize_data_timer = ADD_TIMER(Base::profile(), "DeserializeAndMergeTime");
_hash_table_compute_timer = ADD_TIMER(Base::profile(), "HashTableComputeTime");
_hash_table_emplace_timer = ADD_TIMER(Base::profile(), "HashTableEmplaceTime");
_hash_table_input_counter = ADD_COUNTER(Base::profile(), "HashTableInputCount", TUnit::UNIT);
_max_row_size_counter = ADD_COUNTER(Base::profile(), "MaxRowSizeInBytes", TUnit::UNIT);
_hash_table_size_counter = ADD_COUNTER(profile(), "HashTableSize", TUnit::UNIT);
_queue_byte_size_counter = ADD_COUNTER(profile(), "MaxSizeInBlockQueue", TUnit::BYTES);
_queue_size_counter = ADD_COUNTER(profile(), "MaxSizeOfBlockQueue", TUnit::UNIT);
_streaming_agg_timer = ADD_TIMER(profile(), "StreamingAggTime");
_build_timer = ADD_TIMER(profile(), "BuildTime");
_expr_timer = ADD_TIMER(Base::profile(), "ExprTime");
_get_results_timer = ADD_TIMER(profile(), "GetResultsTime");
_serialize_result_timer = ADD_TIMER(profile(), "SerializeResultTime");
_hash_table_iterate_timer = ADD_TIMER(profile(), "HashTableIterateTime");
_insert_keys_to_column_timer = ADD_TIMER(profile(), "InsertKeysToColumnTime");

Expand Down Expand Up @@ -679,7 +672,7 @@ Status StreamingAggLocalState::_pre_agg_with_serialized_key(doris::vectorized::B
}

for (int i = 0; i != _aggregate_evaluators.size(); ++i) {
SCOPED_TIMER(_serialize_data_timer);
SCOPED_TIMER(_insert_values_to_column_timer);
RETURN_IF_ERROR(
_aggregate_evaluators[i]->streaming_agg_serialize_to_column(
in_block, value_columns[i], rows,
Expand Down Expand Up @@ -848,12 +841,12 @@ Status StreamingAggLocalState::_get_with_serialized_key_result(RuntimeState* sta
return Status::OK();
}

Status StreamingAggLocalState::_serialize_without_key(RuntimeState* state, vectorized::Block* block,
bool* eos) {
Status StreamingAggLocalState::_get_results_without_key(RuntimeState* state,
yiguolei marked this conversation as resolved.
Show resolved Hide resolved
vectorized::Block* block, bool* eos) {
// 1. `child(0)->rows_returned() == 0` mean not data from child
// in level two aggregation node should return NULL result
// level one aggregation node set `eos = true` return directly
SCOPED_TIMER(_serialize_result_timer);
SCOPED_TIMER(_get_results_timer);
if (UNLIKELY(_input_num_rows == 0)) {
*eos = true;
return Status::OK();
Expand Down Expand Up @@ -892,10 +885,10 @@ Status StreamingAggLocalState::_serialize_without_key(RuntimeState* state, vecto
return Status::OK();
}

Status StreamingAggLocalState::_serialize_with_serialized_key_result(RuntimeState* state,
vectorized::Block* block,
bool* eos) {
SCOPED_TIMER(_serialize_result_timer);
Status StreamingAggLocalState::_get_results_with_serialized_key(RuntimeState* state,
yiguolei marked this conversation as resolved.
Show resolved Hide resolved
vectorized::Block* block,
bool* eos) {
SCOPED_TIMER(_get_results_timer);
auto& p = _parent->cast<StreamingAggOperatorX>();
int key_size = _probe_expr_ctxs.size();
int agg_size = _aggregate_evaluators.size();
Expand All @@ -914,7 +907,6 @@ Status StreamingAggLocalState::_serialize_with_serialized_key_result(RuntimeStat
}
}

SCOPED_TIMER(_get_results_timer);
std::visit(
vectorized::Overload {
[&](std::monostate& arg) -> void {
Expand Down Expand Up @@ -970,7 +962,7 @@ Status StreamingAggLocalState::_serialize_with_serialized_key_result(RuntimeStat
}

{
SCOPED_TIMER(_serialize_data_timer);
SCOPED_TIMER(_insert_values_to_column_timer);
for (size_t i = 0; i < _aggregate_evaluators.size(); ++i) {
value_data_types[i] =
_aggregate_evaluators[i]->function()->get_serialized_type();
Expand Down
18 changes: 6 additions & 12 deletions be/src/pipeline/exec/streaming_aggregation_operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ class StreamingAggLocalState final : public PipelineXLocalState<FakeSharedState>
void _update_memusage_with_serialized_key();
Status _init_hash_method(const vectorized::VExprContextSPtrs& probe_exprs);
Status _get_without_key_result(RuntimeState* state, vectorized::Block* block, bool* eos);
Status _serialize_without_key(RuntimeState* state, vectorized::Block* block, bool* eos);
Status _get_results_without_key(RuntimeState* state, vectorized::Block* block, bool* eos);
Status _get_with_serialized_key_result(RuntimeState* state, vectorized::Block* block,
bool* eos);
Status _serialize_with_serialized_key_result(RuntimeState* state, vectorized::Block* block,
bool* eos);
Status _get_results_with_serialized_key(RuntimeState* state, vectorized::Block* block,
bool* eos);

template <bool limit, bool for_spill = false>
Status _merge_with_serialized_key_helper(vectorized::Block* block);
Expand All @@ -83,25 +83,19 @@ class StreamingAggLocalState final : public PipelineXLocalState<FakeSharedState>
Status _create_agg_status(vectorized::AggregateDataPtr data);
size_t _get_hash_table_size();

RuntimeProfile::Counter* _queue_byte_size_counter = nullptr;
RuntimeProfile::Counter* _queue_size_counter = nullptr;
RuntimeProfile::Counter* _streaming_agg_timer = nullptr;
RuntimeProfile::Counter* _hash_table_compute_timer = nullptr;
RuntimeProfile::Counter* _hash_table_emplace_timer = nullptr;
RuntimeProfile::Counter* _hash_table_input_counter = nullptr;
RuntimeProfile::Counter* _build_timer = nullptr;
RuntimeProfile::Counter* _expr_timer = nullptr;
RuntimeProfile::Counter* _build_table_convert_timer = nullptr;
RuntimeProfile::Counter* _serialize_key_timer = nullptr;
RuntimeProfile::Counter* _merge_timer = nullptr;
RuntimeProfile::Counter* _serialize_data_timer = nullptr;
RuntimeProfile::Counter* _insert_values_to_column_timer = nullptr;
RuntimeProfile::Counter* _deserialize_data_timer = nullptr;
RuntimeProfile::Counter* _max_row_size_counter = nullptr;
RuntimeProfile::Counter* _hash_table_memory_usage = nullptr;
RuntimeProfile::HighWaterMarkCounter* _serialize_key_arena_memory_usage = nullptr;
RuntimeProfile::Counter* _hash_table_size_counter = nullptr;
RuntimeProfile::Counter* _get_results_timer = nullptr;
RuntimeProfile::Counter* _serialize_result_timer = nullptr;
RuntimeProfile::Counter* _hash_table_iterate_timer = nullptr;
RuntimeProfile::Counter* _insert_keys_to_column_timer = nullptr;

Expand Down Expand Up @@ -136,13 +130,13 @@ class StreamingAggLocalState final : public PipelineXLocalState<FakeSharedState>
if constexpr (NeedFinalize) {
return local_state->_get_without_key_result(state, block, eos);
} else {
return local_state->_serialize_without_key(state, block, eos);
return local_state->_get_results_without_key(state, block, eos);
}
} else {
if constexpr (NeedFinalize) {
return local_state->_get_with_serialized_key_result(state, block, eos);
} else {
return local_state->_serialize_with_serialized_key_result(state, block, eos);
return local_state->_get_results_with_serialized_key(state, block, eos);
}
}
}
Expand Down
Loading
Loading