Skip to content

Commit

Permalink
More stats and MPI rank for rocm profiles (#635)
Browse files Browse the repository at this point in the history
* Add min/max/avg time/instance in rocm-activity-profile

* Add mpi.rank to rocprofiler activity records
  • Loading branch information
daboehme authored Jan 23, 2025
1 parent 1bfdfb2 commit 2929a36
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 12 deletions.
31 changes: 21 additions & 10 deletions src/caliper/controllers/ROCmActivityProfileController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,19 @@ class RocmActivityProfileController : public cali::ChannelController
output.append(ext);
}

const char* local_let =
"act_count=first(sum#count,count) if rocm.activity"
",dmin=scale(min#rocm.activity.duration,1e-9)"
",davg=scale(avg#rocm.activity.duration,1e-9)"
",dmax=scale(max#rocm.activity.duration,1e-9)";
const char* local_select =
"*,scale(sum#time.duration.ns,1e-9) as time"
",scale(sum#rocm.activity.duration,1e-9) as \"time (gpu)\""
",min(dmin) as \"min time/inst\""
",avg(davg) as \"avg time/inst\""
",max(dmax) as \"max time/inst\""
",sum(act_count) as count";

auto avail_services = services::get_available_services();
bool have_mpi = std::find(avail_services.begin(), avail_services.end(), "mpireport") != avail_services.end();
bool have_adiak =
Expand All @@ -63,22 +76,20 @@ class RocmActivityProfileController : public cali::ChannelController
config()["CALI_MPIREPORT_WRITE_ON_FINALIZE"] = "false";
config()["CALI_MPIREPORT_CONFIG"] = opts.build_query(
"local",
{ { "select",
"*,scale(sum#rocm.activity.duration,1e-9) as \"time (gpu)\" unit sec"
" ,scale(sum#time.duration.ns,1e-9) as \"time\" unit sec" },
{ "group by", "path,rocm.kernel.name,rocm.activity.kind,mpi.rank" },
{ "format", format } }
{ { "let", local_let },
{ "select", local_select },
{ "group by", "path,rocm.kernel.name,rocm.activity.kind,mpi.rank" },
{ "format", format } }
);
} else {
config()["CALI_SERVICES_ENABLE"].append(",report");
config()["CALI_REPORT_FILENAME"] = output;
config()["CALI_REPORT_CONFIG"] = opts.build_query(
"local",
{ { "select",
"*,scale(sum#rocm.activity.duration,1e-9) as \"time (gpu)\" unit sec"
" ,scale(sum#time.duration.ns,1e-9) as \"time\" unit sec" },
{ "group by", "path,rocm.kernel.name,rocm.activity.kind" },
{ "format", format } }
{ { "let", local_let },
{ "select", local_select },
{ "group by", "path,rocm.kernel.name,rocm.activity.kind" },
{ "format", format } }
);
}

Expand Down
18 changes: 16 additions & 2 deletions src/services/rocprofiler/RocProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,14 @@ class RocProfilerService
Caliper c;
c.begin(s_instance->m_flush_region_attr, Variant("ROCPROFILER FLUSH"));

Entry mpi_rank_entry;

{
Attribute mpi_rank_attr = c.get_attribute("mpi.rank");
if (mpi_rank_attr)
mpi_rank_entry = c.get(mpi_rank_attr);
}

for (size_t i = 0; i < num_headers; ++i) {
auto* header = headers[i];

Expand Down Expand Up @@ -216,8 +224,11 @@ class RocProfilerService

cali::Node* correlation = static_cast<cali::Node*>(record->correlation_id.external.ptr);

FixedSizeSnapshotRecord<6> snapshot;
FixedSizeSnapshotRecord<8> snapshot;
c.make_record(6, attr, data, snapshot.builder(), correlation);
if (!mpi_rank_entry.empty())
snapshot.builder().append(mpi_rank_entry);

s_instance->m_channel.events()
.process_snapshot(&c, &s_instance->m_channel, SnapshotView(), snapshot.view());

Expand Down Expand Up @@ -253,8 +264,11 @@ class RocProfilerService

cali::Node* correlation = static_cast<cali::Node*>(record->correlation_id.external.ptr);

FixedSizeSnapshotRecord<6> snapshot;
FixedSizeSnapshotRecord<8> snapshot;
c.make_record(6, attr, data, snapshot.builder(), correlation);
if (!mpi_rank_entry.empty())
snapshot.builder().append(mpi_rank_entry);

s_instance->m_channel.events()
.process_snapshot(&c, &s_instance->m_channel, SnapshotView(), snapshot.view());

Expand Down

0 comments on commit 2929a36

Please sign in to comment.