diff --git a/onnxruntime/core/common/profiler.cc b/onnxruntime/core/common/profiler.cc index cc3c87d0c7701..e631a3d173c2f 100644 --- a/onnxruntime/core/common/profiler.cc +++ b/onnxruntime/core/common/profiler.cc @@ -33,14 +33,15 @@ void Profiler::StartProfiling(const std::string& file_name) { void Profiler::EndTimeAndRecordEvent(EventCategory category, const std::string& event_name, TimePoint& start_time, - std::unordered_map&& event_args, + const std::initializer_list>& event_args, bool /*sync_gpu*/) { if (!enabled_ && !profile_with_logger_) return; long long dur = TimeDiffMicroSeconds(start_time); long long ts = TimeDiffMicroSeconds(profiling_start_time_, start_time); + EventRecord event(category, logging::GetProcessId(), - logging::GetThreadId(), event_name, ts, dur, std::move(event_args)); + logging::GetThreadId(), event_name, ts, dur, { event_args.begin(), event_args.end() }); if (profile_with_logger_) { custom_logger_->SendProfileEvent(event); } else { diff --git a/onnxruntime/core/common/profiler.h b/onnxruntime/core/common/profiler.h index 8a1e7daa5ef23..6811a0d890fd9 100644 --- a/onnxruntime/core/common/profiler.h +++ b/onnxruntime/core/common/profiler.h @@ -4,6 +4,8 @@ #pragma once #include #include +#include +#include #include "core/common/logging/logging.h" namespace onnxruntime { @@ -45,7 +47,7 @@ class Profiler { void EndTimeAndRecordEvent(EventCategory category, const std::string& event_name, TimePoint& start_time, - std::unordered_map&& event_args = std::unordered_map(), + const std::initializer_list>& event_args = {}, bool sync_gpu = false); /* diff --git a/onnxruntime/core/framework/parallel_executor.cc b/onnxruntime/core/framework/parallel_executor.cc index 367056ac90740..72d416947c0a4 100644 --- a/onnxruntime/core/framework/parallel_executor.cc +++ b/onnxruntime/core/framework/parallel_executor.cc @@ -147,8 +147,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, node_name + "_fence_before", sync_time_begin, - std::unordered_map{{"op_name", op_name}}); + {{"op_name", op_name}}); // call compute on the kernel VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name(); @@ -164,7 +163,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, node_name + "_kernel_time", kernel_begin_time, - std::unordered_map{{"op_name", op_name}}); + {{"op_name", op_name}}); sync_time_begin = session_state.Profiler().StartTime(); // sync after compute for outputs @@ -191,7 +190,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, node_name + "_fence_after", sync_time_begin, - std::unordered_map{{"op_name", op_name}}); + {{"op_name", op_name}}); //std::cout << "Run async node finish: " << p_node_index << std::endl; diff --git a/onnxruntime/core/framework/sequential_executor.cc b/onnxruntime/core/framework/sequential_executor.cc index 366fde05e843c..f08380af33c65 100644 --- a/onnxruntime/core/framework/sequential_executor.cc +++ b/onnxruntime/core/framework/sequential_executor.cc @@ -92,8 +92,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state, session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, node_name + "_fence_before", sync_time_begin, - std::unordered_map{{"op_name", op_name}}); + {{"op_name", op_name}}); // call compute on the kernel VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name(); @@ -103,7 +102,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state, session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, node_name + "_kernel_time", kernel_begin_time, - std::unordered_map{{"op_name", op_name}}); + {{"op_name", op_name}}); sync_time_begin = session_state.Profiler().StartTime(); // sync after compute for outputs @@ -130,7 +129,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state, session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, node_name + "_fence_after", sync_time_begin, - std::unordered_map{{"op_name", op_name}}); + {{"op_name", op_name}}); // free ml-values corresponding to this node VLOGS(logger, 1) << "Releasing node ML values after computing kernel: " << p_op_kernel->Node().Name();