Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions onnxruntime/core/common/profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,15 @@ void Profiler::StartProfiling(const std::string& file_name) {
void Profiler::EndTimeAndRecordEvent(EventCategory category,
const std::string& event_name,
TimePoint& start_time,
std::unordered_map<std::string, std::string>&& event_args,
const std::initializer_list<std::pair<std::string, std::string>>& event_args,
bool /*sync_gpu*/) {
if (!enabled_ && !profile_with_logger_)
return;
long long dur = TimeDiffMicroSeconds(start_time);
long long ts = TimeDiffMicroSeconds(profiling_start_time_, start_time);

EventRecord event(category, logging::GetProcessId(),
logging::GetThreadId(), event_name, ts, dur, std::move(event_args));
logging::GetThreadId(), event_name, ts, dur, { event_args.begin(), event_args.end() });
if (profile_with_logger_) {
custom_logger_->SendProfileEvent(event);
} else {
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/core/common/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#pragma once
#include <iostream>
#include <fstream>
#include <tuple>
#include <initializer_list>
#include "core/common/logging/logging.h"

namespace onnxruntime {
Expand Down Expand Up @@ -45,7 +47,7 @@ class Profiler {
void EndTimeAndRecordEvent(EventCategory category,
const std::string& event_name,
TimePoint& start_time,
std::unordered_map<std::string, std::string>&& event_args = std::unordered_map<std::string, std::string>(),
const std::initializer_list<std::pair<std::string, std::string>>& event_args = {},
bool sync_gpu = false);

/*
Expand Down
7 changes: 3 additions & 4 deletions onnxruntime/core/framework/parallel_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index,
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_before",
sync_time_begin,
std::unordered_map<std::string,
std::string>{{"op_name", op_name}});
{{"op_name", op_name}});

// call compute on the kernel
VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name();
Expand All @@ -164,7 +163,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index,
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_kernel_time",
kernel_begin_time,
std::unordered_map<std::string, std::string>{{"op_name", op_name}});
{{"op_name", op_name}});

sync_time_begin = session_state.Profiler().StartTime();
// sync after compute for outputs
Expand All @@ -191,7 +190,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index,
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_after",
sync_time_begin,
std::unordered_map<std::string, std::string>{{"op_name", op_name}});
{{"op_name", op_name}});

//std::cout << "Run async node finish: " << p_node_index << std::endl;

Expand Down
7 changes: 3 additions & 4 deletions onnxruntime/core/framework/sequential_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_before",
sync_time_begin,
std::unordered_map<std::string,
std::string>{{"op_name", op_name}});
{{"op_name", op_name}});

// call compute on the kernel
VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name();
Expand All @@ -103,7 +102,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_kernel_time",
kernel_begin_time,
std::unordered_map<std::string, std::string>{{"op_name", op_name}});
{{"op_name", op_name}});

sync_time_begin = session_state.Profiler().StartTime();
// sync after compute for outputs
Expand All @@ -130,7 +129,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_after",
sync_time_begin,
std::unordered_map<std::string, std::string>{{"op_name", op_name}});
{{"op_name", op_name}});

// free ml-values corresponding to this node
VLOGS(logger, 1) << "Releasing node ML values after computing kernel: " << p_op_kernel->Node().Name();
Expand Down