Skip to content

Commit

Permalink
perf/x86: Support counter mask
Browse files Browse the repository at this point in the history
[ Upstream commit 722e42e ]

The current perf assumes that both GP and fixed counters are contiguous.
But it's not guaranteed on newer Intel platforms or in a virtualization
environment.

Use the counter mask to replace the number of counters for both GP and
the fixed counters. For the other ARCHs or old platforms which don't
support a counter mask, using GENMASK_ULL(num_counter - 1, 0) to
replace. There is no functional change for them.

The interface to KVM is not changed. The number of counters still be
passed to KVM. It can be updated later separately.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Link: https://lkml.kernel.org/r/20240626143545.480761-3-kan.liang@linux.intel.com
Stable-dep-of: f73cefa ("perf/x86: Fix smp_processor_id()-in-preemptible warnings")
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
Kan Liang authored and gregkh committed Aug 12, 2024
1 parent e6fa0f2 commit 4abc98f
Show file tree
Hide file tree
Showing 9 changed files with 199 additions and 179 deletions.
24 changes: 12 additions & 12 deletions arch/x86/events/amd/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
* be removed on one CPU at a time AND PMU is disabled
* when we come here
*/
for (i = 0; i < x86_pmu.num_counters; i++) {
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct perf_event *tmp = event;

if (try_cmpxchg(nb->owners + i, &tmp, NULL))
Expand Down Expand Up @@ -501,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
* because of successive calls to x86_schedule_events() from
* hw_perf_group_sched_in() without hw_perf_enable()
*/
for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
if (new == -1 || hwc->idx == idx)
/* assign free slot, prefer hwc->idx */
old = cmpxchg(nb->owners + idx, NULL, event);
Expand Down Expand Up @@ -544,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
/*
* initialize all possible NB constraints
*/
for (i = 0; i < x86_pmu.num_counters; i++) {
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
__set_bit(i, nb->event_constraints[i].idxmsk);
nb->event_constraints[i].weight = 1;
}
Expand Down Expand Up @@ -737,7 +737,7 @@ static void amd_pmu_check_overflow(void)
* counters are always enabled when this function is called and
* ARCH_PERFMON_EVENTSEL_INT is always set.
*/
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask))
continue;

Expand All @@ -757,7 +757,7 @@ static void amd_pmu_enable_all(int added)

amd_brs_enable_all();

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
/* only activate events which are marked as active */
if (!test_bit(idx, cpuc->active_mask))
continue;
Expand Down Expand Up @@ -980,7 +980,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
/* Clear any reserved bits set by buggy microcode */
status &= amd_pmu_global_cntr_mask;

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask))
continue;

Expand Down Expand Up @@ -1315,7 +1315,7 @@ static __initconst const struct x86_pmu amd_pmu = {
.addr_offset = amd_pmu_addr_offset,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_counters = AMD64_NUM_COUNTERS,
.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
.add = amd_pmu_add_event,
.del = amd_pmu_del_event,
.cntval_bits = 48,
Expand Down Expand Up @@ -1414,7 +1414,7 @@ static int __init amd_core_pmu_init(void)
*/
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);

/* Check for Performance Monitoring v2 support */
if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
Expand All @@ -1424,9 +1424,9 @@ static int __init amd_core_pmu_init(void)
x86_pmu.version = 2;

/* Find the number of available Core PMCs */
x86_pmu.num_counters = ebx.split.num_core_pmc;
x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);

amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;

/* Update PMC handling functions */
x86_pmu.enable_all = amd_pmu_v2_enable_all;
Expand Down Expand Up @@ -1454,12 +1454,12 @@ static int __init amd_core_pmu_init(void)
* even numbered counter that has a consecutive adjacent odd
* numbered counter following it.
*/
for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
even_ctr_mask |= BIT_ULL(i);

pair_constraint = (struct event_constraint)
__EVENT_CONSTRAINT(0, even_ctr_mask, 0,
x86_pmu.num_counters / 2, 0,
x86_pmu_max_num_counters(NULL) / 2, 0,
PERF_X86_EVENT_PAIR);

x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
Expand Down
98 changes: 47 additions & 51 deletions arch/x86/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,53 +189,57 @@ static DEFINE_MUTEX(pmc_reserve_mutex);

#ifdef CONFIG_X86_LOCAL_APIC

static inline int get_possible_num_counters(void)
static inline u64 get_possible_counter_mask(void)
{
int i, num_counters = x86_pmu.num_counters;
u64 cntr_mask = x86_pmu.cntr_mask64;
int i;

if (!is_hybrid())
return num_counters;
return cntr_mask;

for (i = 0; i < x86_pmu.num_hybrid_pmus; i++)
num_counters = max_t(int, num_counters, x86_pmu.hybrid_pmu[i].num_counters);
cntr_mask |= x86_pmu.hybrid_pmu[i].cntr_mask64;

return num_counters;
return cntr_mask;
}

static bool reserve_pmc_hardware(void)
{
int i, num_counters = get_possible_num_counters();
u64 cntr_mask = get_possible_counter_mask();
int i, end;

for (i = 0; i < num_counters; i++) {
for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
goto perfctr_fail;
}

for (i = 0; i < num_counters; i++) {
for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
goto eventsel_fail;
}

return true;

eventsel_fail:
for (i--; i >= 0; i--)
end = i;
for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
release_evntsel_nmi(x86_pmu_config_addr(i));

i = num_counters;
i = X86_PMC_IDX_MAX;

perfctr_fail:
for (i--; i >= 0; i--)
end = i;
for_each_set_bit(i, (unsigned long *)&cntr_mask, end)
release_perfctr_nmi(x86_pmu_event_addr(i));

return false;
}

static void release_pmc_hardware(void)
{
int i, num_counters = get_possible_num_counters();
u64 cntr_mask = get_possible_counter_mask();
int i;

for (i = 0; i < num_counters; i++) {
for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) {
release_perfctr_nmi(x86_pmu_event_addr(i));
release_evntsel_nmi(x86_pmu_config_addr(i));
}
Expand All @@ -248,7 +252,8 @@ static void release_pmc_hardware(void) {}

#endif

bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
unsigned long *fixed_cntr_mask)
{
u64 val, val_fail = -1, val_new= ~0;
int i, reg, reg_fail = -1, ret = 0;
Expand All @@ -259,7 +264,7 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
* Check to see if the BIOS enabled any of the counters, if so
* complain and bail.
*/
for (i = 0; i < num_counters; i++) {
for_each_set_bit(i, cntr_mask, X86_PMC_IDX_MAX) {
reg = x86_pmu_config_addr(i);
ret = rdmsrl_safe(reg, &val);
if (ret)
Expand All @@ -273,12 +278,12 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed)
}
}

if (num_counters_fixed) {
if (*(u64 *)fixed_cntr_mask) {
reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
ret = rdmsrl_safe(reg, &val);
if (ret)
goto msr_fail;
for (i = 0; i < num_counters_fixed; i++) {
for_each_set_bit(i, fixed_cntr_mask, X86_PMC_IDX_MAX) {
if (fixed_counter_disabled(i, pmu))
continue;
if (val & (0x03ULL << i*4)) {
Expand Down Expand Up @@ -679,7 +684,7 @@ void x86_pmu_disable_all(void)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
u64 val;

Expand Down Expand Up @@ -736,7 +741,7 @@ void x86_pmu_enable_all(int added)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx;

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
struct hw_perf_event *hwc = &cpuc->events[idx]->hw;

if (!test_bit(idx, cpuc->active_mask))
Expand Down Expand Up @@ -975,7 +980,6 @@ EXPORT_SYMBOL_GPL(perf_assign_events);

int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
int num_counters = hybrid(cpuc->pmu, num_counters);
struct event_constraint *c;
struct perf_event *e;
int n0, i, wmin, wmax, unsched = 0;
Expand Down Expand Up @@ -1051,7 +1055,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)

/* slow path */
if (i != n) {
int gpmax = num_counters;
int gpmax = x86_pmu_max_num_counters(cpuc->pmu);

/*
* Do not allow scheduling of more than half the available
Expand All @@ -1072,7 +1076,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
* the extra Merge events needed by large increment events.
*/
if (x86_pmu.flags & PMU_FL_PAIR) {
gpmax = num_counters - cpuc->n_pair;
gpmax -= cpuc->n_pair;
WARN_ON(gpmax <= 0);
}

Expand Down Expand Up @@ -1157,12 +1161,10 @@ static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
*/
static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
{
int num_counters = hybrid(cpuc->pmu, num_counters);
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
struct perf_event *event;
int n, max_count;

max_count = num_counters + num_counters_fixed;
max_count = x86_pmu_num_counters(cpuc->pmu) + x86_pmu_num_counters_fixed(cpuc->pmu);

/* current number of events already accepted */
n = cpuc->n_events;
Expand Down Expand Up @@ -1522,13 +1524,13 @@ void perf_event_print_debug(void)
u64 pebs, debugctl;
int cpu = smp_processor_id();
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
int num_counters = hybrid(cpuc->pmu, num_counters);
int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed);
unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask);
unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask);
struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints);
unsigned long flags;
int idx;

if (!num_counters)
if (!*(u64 *)cntr_mask)
return;

local_irq_save(flags);
Expand All @@ -1555,7 +1557,7 @@ void perf_event_print_debug(void)
}
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);

for (idx = 0; idx < num_counters; idx++) {
for_each_set_bit(idx, cntr_mask, X86_PMC_IDX_MAX) {
rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
rdmsrl(x86_pmu_event_addr(idx), pmc_count);

Expand All @@ -1568,7 +1570,7 @@ void perf_event_print_debug(void)
pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
cpu, idx, prev_left);
}
for (idx = 0; idx < num_counters_fixed; idx++) {
for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) {
if (fixed_counter_disabled(idx, cpuc->pmu))
continue;
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
Expand Down Expand Up @@ -1682,7 +1684,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
*/
apic_write(APIC_LVTPC, APIC_DM_NMI);

for (idx = 0; idx < x86_pmu.num_counters; idx++) {
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask))
continue;

Expand Down Expand Up @@ -2038,18 +2040,15 @@ static void _x86_pmu_read(struct perf_event *event)
static_call(x86_pmu_update)(event);
}

void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
u64 intel_ctrl)
void x86_pmu_show_pmu_cap(struct pmu *pmu)
{
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", num_counters);
pr_info("... generic registers: %d\n", x86_pmu_num_counters(pmu));
pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
pr_info("... max period: %016Lx\n", x86_pmu.max_period);
pr_info("... fixed-purpose events: %lu\n",
hweight64((((1ULL << num_counters_fixed) - 1)
<< INTEL_PMC_IDX_FIXED) & intel_ctrl));
pr_info("... event mask: %016Lx\n", intel_ctrl);
pr_info("... fixed-purpose events: %d\n", x86_pmu_num_counters_fixed(pmu));
pr_info("... event mask: %016Lx\n", hybrid(pmu, intel_ctrl));
}

static int __init init_hw_perf_events(void)
Expand Down Expand Up @@ -2086,7 +2085,7 @@ static int __init init_hw_perf_events(void)
pmu_check_apic();

/* sanity check that the hardware exists or is emulated */
if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fixed))
if (!check_hw_exists(&pmu, x86_pmu.cntr_mask, x86_pmu.fixed_cntr_mask))
goto out_bad_pmu;

pr_cont("%s PMU driver.\n", x86_pmu.name);
Expand All @@ -2097,14 +2096,14 @@ static int __init init_hw_perf_events(void)
quirk->func();

if (!x86_pmu.intel_ctrl)
x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
x86_pmu.intel_ctrl = x86_pmu.cntr_mask64;

perf_events_lapic_init();
register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");

unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
0, x86_pmu.num_counters, 0, 0);
__EVENT_CONSTRAINT(0, x86_pmu.cntr_mask64,
0, x86_pmu_num_counters(NULL), 0, 0);

x86_pmu_format_group.attrs = x86_pmu.format_attrs;

Expand All @@ -2113,11 +2112,8 @@ static int __init init_hw_perf_events(void)

pmu.attr_update = x86_pmu.attr_update;

if (!is_hybrid()) {
x86_pmu_show_pmu_cap(x86_pmu.num_counters,
x86_pmu.num_counters_fixed,
x86_pmu.intel_ctrl);
}
if (!is_hybrid())
x86_pmu_show_pmu_cap(NULL);

if (!x86_pmu.read)
x86_pmu.read = _x86_pmu_read;
Expand Down Expand Up @@ -2481,7 +2477,7 @@ void perf_clear_dirty_counters(void)
for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
if (i >= INTEL_PMC_IDX_FIXED) {
/* Metrics and fake events don't have corresponding HW counters. */
if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed))
if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mask)))
continue;

wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
Expand Down Expand Up @@ -2986,8 +2982,8 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
* base PMU holds the correct number of counters for P-cores.
*/
cap->version = x86_pmu.version;
cap->num_counters_gp = x86_pmu.num_counters;
cap->num_counters_fixed = x86_pmu.num_counters_fixed;
cap->num_counters_gp = x86_pmu_num_counters(NULL);
cap->num_counters_fixed = x86_pmu_num_counters_fixed(NULL);
cap->bit_width_gp = x86_pmu.cntval_bits;
cap->bit_width_fixed = x86_pmu.cntval_bits;
cap->events_mask = (unsigned int)x86_pmu.events_maskl;
Expand Down
Loading

0 comments on commit 4abc98f

Please sign in to comment.