Skip to content

Commit cf45b73

Browse files
committed
Dynamically detect PMU capabilities through libpfm
- Instead of allowing for up to 3 counters, libpfm's internal capabilities of reporting PMU info are used to manage a per-PMU "registry" and dynamically allocate "slots" according to the specific counters requested. - per-PMU information is obtained, where each PMU reports its own capabilities in the form of fixed/non-fixed counter limits. - In this PR/commit, it is *still* impossible to get more detailed (x86-only) counter information in terms of fixed/non-fixed counter association, due to what seems to be a lack of API surface on libpfm itself: https://sourceforge.net/p/perfmon2/mailman/message/37631173/ - The maximal number of counters is bumped from 3 to 63, which together with the current padding "scheme" means we pre-allocate/inlline up-to 64 counter slots (64-bits each) per measurement instance - Closes #1377
1 parent 60b16f1 commit cf45b73

File tree

2 files changed

+134
-26
lines changed

2 files changed

+134
-26
lines changed

src/perf_counters.cc

+127-20
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,130 @@
1919
#include <vector>
2020

2121
#if defined HAVE_LIBPFM
22+
#include <unordered_map>
2223
#include "perfmon/pfmlib.h"
2324
#include "perfmon/pfmlib_perf_event.h"
2425
#endif
2526

2627
namespace benchmark {
2728
namespace internal {
2829

29-
constexpr size_t PerfCounterValues::kMaxCounters;
30-
3130
#if defined HAVE_LIBPFM
31+
32+
class SinglePMURegistry {
33+
public:
34+
~SinglePMURegistry() = default;
35+
SinglePMURegistry(SinglePMURegistry&&) = default;
36+
SinglePMURegistry(const SinglePMURegistry&) = delete;
37+
SinglePMURegistry& operator=(SinglePMURegistry&&) noexcept;
38+
SinglePMURegistry& operator=(const SinglePMURegistry&) = delete;
39+
40+
SinglePMURegistry(pfm_pmu_t pmu_id)
41+
: pmu_id_(pmu_id), available_counters_(0), available_fixed_counters_(0) {
42+
{
43+
pfm_pmu_info_t pmu_info{};
44+
const auto pfm_pmu = pfm_get_pmu_info(pmu_id, &pmu_info);
45+
46+
if (pfm_pmu != PFM_SUCCESS) {
47+
GetErrorLogInstance() << "Unknown pmu: " << pmu_id << "\n";
48+
return;
49+
}
50+
51+
name_ = pmu_info.name;
52+
desc_ = pmu_info.desc;
53+
available_counters_ = pmu_info.num_cntrs;
54+
available_fixed_counters_ = pmu_info.num_fixed_cntrs;
55+
56+
BM_VLOG(1) << "PMU: " << pmu_id << " " << name_ << " " << desc_ << "\n";
57+
BM_VLOG(1) << " counters: " << available_counters_ << " fixed: " << available_fixed_counters_ << "\n";
58+
}
59+
}
60+
61+
const char* name() const { return name_; }
62+
63+
bool AddCounter(int event_id) {
64+
pfm_event_info_t info{};
65+
const auto pfm_event_info =
66+
pfm_get_event_info(event_id, PFM_OS_PERF_EVENT, &info);
67+
68+
if (pfm_event_info != PFM_SUCCESS) {
69+
GetErrorLogInstance() << "Unknown event id: " << event_id << "\n";
70+
return false;
71+
}
72+
73+
assert(info.pmu == pmu_id_);
74+
75+
if (counter_ids_.find(event_id) != counter_ids_.end()) return true;
76+
77+
assert(std::numeric_limits<int>::max() > counter_ids_.size());
78+
if (static_cast<int>(counter_ids_.size()) >= available_counters_ - 1) {
79+
GetErrorLogInstance() << "Maximal number of counters for PMU " << name_
80+
<< " (" << available_counters_ << ") reached.\n";
81+
return false;
82+
}
83+
84+
counter_ids_.emplace(event_id, info.code);
85+
86+
BM_VLOG(2) << "Registered counter: " << event_id << " (" << info.name << " - " << info.desc
87+
<< ") in pmu " << name_ << " (" << counter_ids_.size() << "/" << available_counters_ << "\n";
88+
89+
return true;
90+
}
91+
92+
private:
93+
pfm_pmu_t pmu_id_;
94+
const char* name_;
95+
const char* desc_;
96+
std::unordered_map<int, uint64_t> counter_ids_;
97+
std::unordered_map<int, uint64_t> fixed_counter_ids_;
98+
int available_counters_;
99+
int available_fixed_counters_;
100+
};
101+
102+
class PMURegistry {
103+
public:
104+
~PMURegistry() = default;
105+
PMURegistry(PMURegistry&&) = default;
106+
PMURegistry(const PMURegistry&) = delete;
107+
PMURegistry& operator=(PMURegistry&&) noexcept;
108+
PMURegistry& operator=(const PMURegistry&) = delete;
109+
PMURegistry() {}
110+
111+
bool EnlistCounter(const std::string& name, struct perf_event_attr &attr_base) {
112+
attr_base.size = sizeof(attr_base);
113+
pfm_perf_encode_arg_t encoding{};
114+
encoding.attr = &attr_base;
115+
116+
const auto pfm_get = pfm_get_os_event_encoding(
117+
name.c_str(), PFM_PLM3, PFM_OS_PERF_EVENT, &encoding);
118+
if (pfm_get != PFM_SUCCESS) {
119+
GetErrorLogInstance() << "Unknown counter name: " << name << "\n";
120+
return false;
121+
}
122+
123+
pfm_event_info_t info{};
124+
const auto pfm_info =
125+
pfm_get_event_info(encoding.idx, PFM_OS_PERF_EVENT, &info);
126+
if (pfm_info != PFM_SUCCESS) {
127+
GetErrorLogInstance()
128+
<< "Unknown counter idx: " << encoding.idx << "(" << name << ")\n";
129+
return false;
130+
}
131+
132+
// Spin-up a new per-PMU sub-registry if needed
133+
if (pmu_registry_.find(info.pmu) == pmu_registry_.end()) {
134+
pmu_registry_.emplace(info.pmu, SinglePMURegistry(info.pmu));
135+
}
136+
137+
auto& single_pmu = pmu_registry_.find(info.pmu)->second;
138+
139+
return single_pmu.AddCounter(info.idx);
140+
}
141+
142+
private:
143+
std::unordered_map<pfm_pmu_t, SinglePMURegistry> pmu_registry_;
144+
};
145+
32146
const bool PerfCounters::kSupported = true;
33147

34148
bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; }
@@ -38,35 +152,28 @@ PerfCounters PerfCounters::Create(
38152
if (counter_names.empty()) {
39153
return NoCounters();
40154
}
41-
if (counter_names.size() > PerfCounterValues::kMaxCounters) {
42-
GetErrorLogInstance()
43-
<< counter_names.size()
44-
<< " counters were requested. The minimum is 1, the maximum is "
45-
<< PerfCounterValues::kMaxCounters << "\n";
46-
return NoCounters();
47-
}
155+
48156
std::vector<int> counter_ids(counter_names.size());
157+
PMURegistry registry{};
49158

50-
const int mode = PFM_PLM3; // user mode only
51159
for (size_t i = 0; i < counter_names.size(); ++i) {
52-
const bool is_first = i == 0;
53-
struct perf_event_attr attr {};
54-
attr.size = sizeof(attr);
55-
const int group_id = !is_first ? counter_ids[0] : -1;
56160
const auto& name = counter_names[i];
57161
if (name.empty()) {
58162
GetErrorLogInstance() << "A counter name was the empty string\n";
59163
return NoCounters();
60164
}
61-
pfm_perf_encode_arg_t arg{};
62-
arg.attr = &attr;
63165

64-
const int pfm_get =
65-
pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg);
66-
if (pfm_get != PFM_SUCCESS) {
67-
GetErrorLogInstance() << "Unknown counter name: " << name << "\n";
166+
struct perf_event_attr attr {};
167+
auto ok = registry.EnlistCounter(name, attr);
168+
169+
if (!ok) {
170+
GetErrorLogInstance() << "Failed to register counter: " << name << "\n";
68171
return NoCounters();
69172
}
173+
174+
const bool is_first = i == 0;
175+
const int group_id = !is_first ? counter_ids[0] : -1;
176+
70177
attr.disabled = is_first;
71178
// Note: the man page for perf_event_create suggests inerit = true and
72179
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the

src/perf_counters.h

+7-6
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,14 @@ namespace internal {
4949
// operator[]) of this object.
5050
class PerfCounterValues {
5151
public:
52-
explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
53-
BM_CHECK_LE(nr_counters_, kMaxCounters);
52+
explicit PerfCounterValues(int nr_counters)
53+
: nr_counters_(nr_counters)
54+
{
55+
BM_CHECK_LE(nr_counters_, kMaxPreAllocatedCounters);
5456
}
55-
5657
uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; }
5758

58-
static constexpr size_t kMaxCounters = 3;
59+
static constexpr size_t kMaxPreAllocatedCounters = 63;
5960

6061
private:
6162
friend class PerfCounters;
@@ -67,8 +68,8 @@ class PerfCounterValues {
6768
}
6869

6970
static constexpr size_t kPadding = 1;
70-
std::array<uint64_t, kPadding + kMaxCounters> values_;
71-
const size_t nr_counters_;
71+
std::array<uint64_t, kPadding + kMaxPreAllocatedCounters> values_;
72+
const int nr_counters_;
7273
};
7374

7475
// Collect PMU counters. The object, once constructed, is ready to be used by

0 commit comments

Comments
 (0)