diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 4f53d0b97539b..7118b5c0c6419 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o -obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o +obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o vpa-pmu.o obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o diff --git a/arch/powerpc/perf/vpa-pmu.c b/arch/powerpc/perf/vpa-pmu.c new file mode 100644 index 0000000000000..e5a027f4313b2 --- /dev/null +++ b/arch/powerpc/perf/vpa-pmu.c @@ -0,0 +1,418 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Perf interface to expose Virtual Processor counters. + * + * Copyright (C) 2024 Kajol Jain, IBM Corporation + */ + +#define pr_fmt(fmt) "vpa-pmu: " fmt + +#include +#include +#include + +#define EVENT(_name, _code) enum{_name = _code} + +/* + * Dispatch Trace log Event codes. + */ +EVENT(DTL_CEDE, 0x1); +EVENT(DTL_PREEMPT, 0x2); +EVENT(DTL_FAULT, 0x3); +EVENT(DTL_ALL, 0x4); + +GENERIC_EVENT_ATTR(dtl_cede, DTL_CEDE); +GENERIC_EVENT_ATTR(dtl_preempt, DTL_PREEMPT); +GENERIC_EVENT_ATTR(dtl_fault, DTL_FAULT); +GENERIC_EVENT_ATTR(dtl_all, DTL_ALL); + +PMU_FORMAT_ATTR(event, "config:0-7"); + +static struct attribute *events_attr[] = { + GENERIC_EVENT_PTR(DTL_CEDE), + GENERIC_EVENT_PTR(DTL_PREEMPT), + GENERIC_EVENT_PTR(DTL_FAULT), + GENERIC_EVENT_PTR(DTL_ALL), + NULL +}; + +static struct attribute_group event_group = { + .name = "events", + .attrs = events_attr, +}; + +static struct attribute *format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static const struct attribute_group format_group = { + .name = "format", + .attrs = format_attrs, +}; + +static const struct attribute_group *attr_groups[] = { + &format_group, + &event_group, + NULL, +}; + +/* + * Dispatch trace log event enable mask defined in file dtl.h: + * DTL_LOG_CEDE: voluntary virtual processor waits + * DTL_LOG_PREEMPT: time-slice preempts + * DTL_LOG_FAULT: virtual partition memory page faults + * DTL_LOG_ALL: (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT) + */ +static const u8 vpa_dtl_enable_mask[] = { + [1] = DTL_LOG_CEDE, + [2] = DTL_LOG_PREEMPT, + [3] = DTL_LOG_FAULT, + [4] = DTL_LOG_ALL, +}; + +struct vpa_dtl { + struct dtl_entry *buf; + u64 last_idx; + bool active_lock; +}; + +static DEFINE_PER_CPU(struct vpa_dtl, vpa_cpu_dtl); + +/* variable to capture reference count for the active dtl threads */ +static int dtl_global_refc; +static spinlock_t dtl_global_lock = __SPIN_LOCK_UNLOCKED(dtl_global_lock); + +/* + * Function to parse dispatch trace log data and to prepare + * the perf raw sample. + */ +static void vpa_dtl_dump_sample_data(struct perf_event *event) +{ + struct perf_sample_data data; + struct perf_raw_record raw; + struct pt_regs regs; + int overflow; + u64 cur_idx, last_idx, i; + char *buf; + + /* actual number of entries read */ + long n_read = 0, read_size = 0; + + /* number of entries added to dtl buffer */ + long n_req; + + struct vpa_dtl *dtl = &per_cpu(vpa_cpu_dtl, event->cpu); + int version = 1; + + /* Setup perf sample */ + perf_sample_data_init(&data, 0, event->hw.last_period); + memset(®s, 0, sizeof(regs)); + memset(&raw, 0, sizeof(raw)); + + cur_idx = be64_to_cpu(lppaca_of(event->cpu).dtl_idx); + last_idx = dtl->last_idx; + + if (last_idx + N_DISPATCH_LOG <= cur_idx) + last_idx = cur_idx - N_DISPATCH_LOG + 1; + + n_req = cur_idx - last_idx; + + /* no new entry added to the buffer, return */ + if (n_req <= 0) + return; + + dtl->last_idx = last_idx + n_req; + + buf = kzalloc((n_req * sizeof(struct dtl_entry)) + sizeof(version) + + sizeof(tb_ticks_per_sec) + sizeof(n_req), GFP_KERNEL); + raw.frag.data = buf; + + /* Save current version of dtl sampling support */ + memcpy(buf, &version, sizeof(version)); + buf += sizeof(version); + + /* Save tb_ticks_per_sec to convert timebase to sec */ + memcpy(buf, &tb_ticks_per_sec, sizeof(tb_ticks_per_sec)); + buf += sizeof(tb_ticks_per_sec); + + /* Save total number of dtl entries added to the dtl buffer */ + memcpy(buf, &n_req, sizeof(n_req)); + buf += sizeof(n_req); + + i = last_idx % N_DISPATCH_LOG; + + /* read the tail of the buffer if we've wrapped */ + if (i + n_req > N_DISPATCH_LOG) { + read_size = N_DISPATCH_LOG - i; + memcpy(buf, &dtl->buf[i], read_size * sizeof(struct dtl_entry)); + i = 0; + n_req -= read_size; + n_read += read_size; + buf += read_size * sizeof(struct dtl_entry); + } + + /* .. and now the head */ + memcpy(buf, &dtl->buf[i], n_req * sizeof(struct dtl_entry)); + n_read += n_req; + + raw.frag.size = n_read * sizeof(struct dtl_entry) + sizeof(version) + + sizeof(tb_ticks_per_sec) + sizeof(n_req); + + perf_sample_save_raw_data(&data, &raw); + overflow = perf_event_overflow(event, &data, ®s); +} + +/* + * The VPA Dispatch Trace log counters do not interrupt on overflow. + * Therefore, the kernel needs to poll the counters to avoid missing + * an overflow using hrtimer. The timer interval is based on sample_period + * count provided by user, and default to 10000 if the input is smaller than + * 10000. + */ +static enum hrtimer_restart vpa_dtl_hrtimer_handle(struct hrtimer *hrtimer) +{ + struct perf_event *event; + u64 period; + + event = container_of(hrtimer, struct perf_event, hw.hrtimer); + + if (event->state != PERF_EVENT_STATE_ACTIVE) + return HRTIMER_NORESTART; + + vpa_dtl_dump_sample_data(event); + period = max_t(u64, 10000, event->hw.sample_period); + hrtimer_forward_now(hrtimer, ns_to_ktime(period)); + + return HRTIMER_RESTART; +} + +static void vpa_dtl_start_hrtimer(struct perf_event *event) +{ + u64 period; + struct hw_perf_event *hwc = &event->hw; + + period = max_t(u64, 10000, hwc->sample_period); + hrtimer_start(&hwc->hrtimer, ns_to_ktime(period), HRTIMER_MODE_REL_PINNED); +} + +static void vpa_dtl_stop_hrtimer(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + hrtimer_cancel(&hwc->hrtimer); +} + +static void vpa_dtl_reset_global_refc(struct perf_event *event) +{ + spin_lock(&dtl_global_lock); + dtl_global_refc--; + if (dtl_global_refc <= 0) { + dtl_global_refc = 0; + write_unlock(&dtl_access_lock); + } + spin_unlock(&dtl_global_lock); +} + +static int vpa_dtl_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct vpa_dtl *dtl = &per_cpu(vpa_cpu_dtl, event->cpu); + + /* test the event attr type for PMU enumeration */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (!perfmon_capable()) + return -EACCES; + + /* Return if this is a counting event */ + if (!is_sampling_event(event)) + return -EOPNOTSUPP; + + if (!(event->attr.sample_type & PERF_SAMPLE_RAW)) + return -EOPNOTSUPP; + + /* Invalid eventcode */ + if (event->attr.config <= 0 || event->attr.config > 4) + return -EINVAL; + + /* ensure there are no other conflicting dtl users */ + spin_lock(&dtl_global_lock); + + /* ensure there are no other conflicting dtl users */ + if (dtl_global_refc == 0 && !write_trylock(&dtl_access_lock)) { + spin_unlock(&dtl_global_lock); + return -EBUSY; + } + dtl_global_refc++; + dtl->active_lock = true; + spin_unlock(&dtl_global_lock); + + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = vpa_dtl_hrtimer_handle; + + event->destroy = vpa_dtl_reset_global_refc; + return 0; +} + +static int vpa_dtl_event_add(struct perf_event *event, int flags) +{ + int ret, hwcpu; + unsigned long addr; + struct vpa_dtl *dtl = &per_cpu(vpa_cpu_dtl, event->cpu); + + /* + * Register our dtl buffer with the hypervisor. The + * HV expects the buffer size to be passed in the second + * word of the buffer. + */ + ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES); + dtl->last_idx = 0; + + hwcpu = get_hard_smp_processor_id(event->cpu); + addr = __pa(dtl->buf); + + ret = register_dtl(hwcpu, addr); + if (ret) { + pr_warn("DTL registration for cpu %d (hw %d) failed with %d\n", + event->cpu, hwcpu, ret); + return ret; + } + + /* set our initial buffer indices */ + lppaca_of(event->cpu).dtl_idx = 0; + + /* + * Ensure that our updates to the lppaca fields have + * occurred before we actually enable the logging + */ + smp_wmb(); + + /* enable event logging */ + lppaca_of(event->cpu).dtl_enable_mask = vpa_dtl_enable_mask[event->attr.config]; + + vpa_dtl_start_hrtimer(event); + + return 0; +} + +static void vpa_dtl_event_del(struct perf_event *event, int flags) +{ + int hwcpu = get_hard_smp_processor_id(event->cpu); + + vpa_dtl_dump_sample_data(event); + vpa_dtl_stop_hrtimer(event); + unregister_dtl(hwcpu); + lppaca_of(event->cpu).dtl_enable_mask = 0x0; +} + +static void vpa_dtl_event_read(struct perf_event *event) +{ + /* + * This function defination is empty as dtl_dump_sample_data + * is used to parse and dump the dispatch trace log data, + * to perf raw sample. + */ +} + +/* Allocate dtl buffer memory for given cpu. */ +static int vpa_dtl_mem_alloc(int cpu) +{ + struct vpa_dtl *dtl = &per_cpu(vpa_cpu_dtl, cpu); + struct dtl_entry *buf = NULL; + + dtl->active_lock = false; + + /* Check for dispatch trace log buffer cache */ + if (!dtl_cache) + return -ENOMEM; + + buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(cpu)); + if (!buf) { + pr_warn("buffer allocation failed for cpu %d\n", cpu); + return -ENOMEM; + } + dtl->buf = buf; + return 0; +} + +static int vpa_dtl_cpu_online(unsigned int cpu) +{ + return vpa_dtl_mem_alloc(cpu); +} + +static int vpa_dtl_cpu_offline(unsigned int cpu) +{ + struct vpa_dtl *dtl = &per_cpu(vpa_cpu_dtl, cpu); + + /* Reduce the ref count if dtl event running on this cpu */ + spin_lock(&dtl_global_lock); + if (dtl_global_refc && dtl->active_lock) { + dtl_global_refc--; + dtl->active_lock = false; + } + if (dtl_global_refc <= 0) { + dtl_global_refc = 0; + write_unlock(&dtl_access_lock); + } + spin_unlock(&dtl_global_lock); + return 0; +} + +static int vpa_dtl_cpu_hotplug_init(void) +{ + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_VPA_DTL_ONLINE, + "perf/powerpc/vpa_pmu:online", + vpa_dtl_cpu_online, + vpa_dtl_cpu_offline); +} + +static void vpa_dtl_clear_memory(void) +{ + int i; + + for_each_online_cpu(i) { + struct vpa_dtl *dtl = &per_cpu(vpa_cpu_dtl, i); + + kmem_cache_free(dtl_cache, dtl->buf); + dtl->buf = NULL; + } +} + +static struct pmu vpa_dtl_pmu = { + .task_ctx_nr = perf_invalid_context, + + .name = "vpa_dtl", + .attr_groups = attr_groups, + .event_init = vpa_dtl_event_init, + .add = vpa_dtl_event_add, + .del = vpa_dtl_event_del, + .read = vpa_dtl_event_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_EXCLUSIVE, +}; + +static int vpa_pmu_init(void) +{ + int r; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) { + pr_debug("not a shared virtualized system, not enabling\n"); + return -ENODEV; + } + + /* init cpuhotplug */ + r = vpa_dtl_cpu_hotplug_init(); + if (r) { + vpa_dtl_clear_memory(); + return r; + } + + r = perf_pmu_register(&vpa_dtl_pmu, vpa_dtl_pmu.name, -1); + if (r) + return r; + + return 0; +} + +device_initcall(vpa_pmu_init); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 7a5785f405b62..c335af5a5a8ee 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -232,6 +232,7 @@ enum cpuhp_state { CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, + CPUHP_AP_PERF_POWERPC_VPA_DTL_ONLINE, CPUHP_AP_PERF_CSKY_ONLINE, CPUHP_AP_TMIGR_ONLINE, CPUHP_AP_WATCHDOG_ONLINE,