diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 4f53d0b97539b..7118b5c0c6419 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_PPC_POWERNV)	+= imc-pmu.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
 
-obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o
+obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o vpa-pmu.o
 
 obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o
 
diff --git a/arch/powerpc/perf/vpa-pmu.c b/arch/powerpc/perf/vpa-pmu.c
new file mode 100644
index 0000000000000..e5a027f4313b2
--- /dev/null
+++ b/arch/powerpc/perf/vpa-pmu.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Perf interface to expose Virtual Processor counters.
+ *
+ * Copyright (C) 2024 Kajol Jain, IBM Corporation
+ */
+
+#define pr_fmt(fmt) "vpa-pmu: " fmt
+
+#include <asm/dtl.h>
+#include <linux/perf_event.h>
+#include <asm/plpar_wrappers.h>
+
+#define EVENT(_name, _code)     enum{_name = _code}
+
+/*
+ * Dispatch Trace log Event codes.
+ */
+EVENT(DTL_CEDE,	0x1);
+EVENT(DTL_PREEMPT,	0x2);
+EVENT(DTL_FAULT,	0x3);
+EVENT(DTL_ALL,	0x4);
+
+GENERIC_EVENT_ATTR(dtl_cede, DTL_CEDE);
+GENERIC_EVENT_ATTR(dtl_preempt, DTL_PREEMPT);
+GENERIC_EVENT_ATTR(dtl_fault, DTL_FAULT);
+GENERIC_EVENT_ATTR(dtl_all, DTL_ALL);
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *events_attr[] = {
+	GENERIC_EVENT_PTR(DTL_CEDE),
+	GENERIC_EVENT_PTR(DTL_PREEMPT),
+	GENERIC_EVENT_PTR(DTL_FAULT),
+	GENERIC_EVENT_PTR(DTL_ALL),
+	NULL
+};
+
+static struct attribute_group event_group = {
+	.name = "events",
+	.attrs = events_attr,
+};
+
+static struct attribute *format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static const struct attribute_group format_group = {
+	.name = "format",
+	.attrs = format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&format_group,
+	&event_group,
+	NULL,
+};
+
+/*
+ * Dispatch trace log event enable mask defined in file dtl.h:
+ *   DTL_LOG_CEDE: voluntary virtual processor waits
+ *   DTL_LOG_PREEMPT: time-slice preempts
+ *   DTL_LOG_FAULT: virtual partition memory page faults
+ *   DTL_LOG_ALL: (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT)
+ */
+static const u8 vpa_dtl_enable_mask[] = {
+	[1] = DTL_LOG_CEDE,
+	[2] = DTL_LOG_PREEMPT,
+	[3] = DTL_LOG_FAULT,
+	[4] = DTL_LOG_ALL,
+};
+
+struct vpa_dtl {
+	struct dtl_entry	*buf;
+	u64			last_idx;
+	bool			active_lock;
+};
+
+static DEFINE_PER_CPU(struct vpa_dtl, vpa_cpu_dtl);
+
+/* variable to capture reference count for the active dtl threads */
+static int dtl_global_refc;
+static spinlock_t dtl_global_lock = __SPIN_LOCK_UNLOCKED(dtl_global_lock);
+
+/*
+ * Function to parse dispatch trace log data and to prepare
+ * the perf raw sample.
+ */
+static void vpa_dtl_dump_sample_data(struct perf_event *event)
+{
+	struct perf_sample_data data;
+	struct perf_raw_record raw;
+	struct pt_regs regs;
+	int overflow;
+	u64 cur_idx, last_idx, i;
+	char *buf;
+
+	/* actual number of entries read */
+	long n_read = 0, read_size = 0;
+
+	/* number of entries added to dtl buffer */
+	long n_req;
+
+	struct vpa_dtl *dtl = &per_cpu(vpa_cpu_dtl, event->cpu);
+	int version = 1;
+
+	/* Setup perf sample */
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+	memset(&regs, 0, sizeof(regs));
+	memset(&raw, 0, sizeof(raw));
+
+	cur_idx = be64_to_cpu(lppaca_of(event->cpu).dtl_idx);
+	last_idx = dtl->last_idx;
+
+	if (last_idx + N_DISPATCH_LOG <= cur_idx)
+		last_idx = cur_idx - N_DISPATCH_LOG + 1;
+
+	n_req = cur_idx - last_idx;
+
+	/* no new entry added to the buffer, return */
+	if (n_req <= 0)
+		return;
+
+	dtl->last_idx = last_idx + n_req;
+
+	buf = kzalloc((n_req * sizeof(struct dtl_entry)) + sizeof(version) +
+			sizeof(tb_ticks_per_sec) + sizeof(n_req), GFP_KERNEL);
+	raw.frag.data = buf;
+
+	/* Save current version of dtl sampling support */
+	memcpy(buf, &version, sizeof(version));
+	buf += sizeof(version);
+
+	/* Save tb_ticks_per_sec to convert timebase to sec */
+	memcpy(buf, &tb_ticks_per_sec, sizeof(tb_ticks_per_sec));
+	buf += sizeof(tb_ticks_per_sec);
+
+	/* Save total number of dtl entries added to the dtl buffer */
+	memcpy(buf, &n_req, sizeof(n_req));
+	buf += sizeof(n_req);
+
+	i = last_idx % N_DISPATCH_LOG;
+
+	/* read the tail of the buffer if we've wrapped */
+	if (i + n_req > N_DISPATCH_LOG) {
+		read_size = N_DISPATCH_LOG - i;
+		memcpy(buf, &dtl->buf[i], read_size * sizeof(struct dtl_entry));
+		i = 0;
+		n_req -= read_size;
+		n_read += read_size;
+		buf += read_size * sizeof(struct dtl_entry);
+	}
+
+	/* .. and now the head */
+	memcpy(buf, &dtl->buf[i], n_req * sizeof(struct dtl_entry));
+	n_read += n_req;
+
+	raw.frag.size = n_read * sizeof(struct dtl_entry) + sizeof(version) +
+		sizeof(tb_ticks_per_sec) + sizeof(n_req);
+
+	perf_sample_save_raw_data(&data, &raw);
+	overflow = perf_event_overflow(event, &data, &regs);
+}
+
+/*
+ * The VPA Dispatch Trace log counters do not interrupt on overflow.
+ * Therefore, the kernel needs to poll the counters to avoid missing
+ * an overflow using hrtimer. The timer interval is based on sample_period
+ * count provided by user, and default to 10000 if the input is smaller than
+ * 10000.
+ */
+static enum hrtimer_restart vpa_dtl_hrtimer_handle(struct hrtimer *hrtimer)
+{
+	struct perf_event *event;
+	u64 period;
+
+	event = container_of(hrtimer, struct perf_event, hw.hrtimer);
+
+	if (event->state != PERF_EVENT_STATE_ACTIVE)
+		return HRTIMER_NORESTART;
+
+	vpa_dtl_dump_sample_data(event);
+	period = max_t(u64, 10000, event->hw.sample_period);
+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
+
+	return HRTIMER_RESTART;
+}
+
+static void vpa_dtl_start_hrtimer(struct perf_event *event)
+{
+	u64 period;
+	struct hw_perf_event *hwc = &event->hw;
+
+	period = max_t(u64, 10000, hwc->sample_period);
+	hrtimer_start(&hwc->hrtimer, ns_to_ktime(period), HRTIMER_MODE_REL_PINNED);
+}
+
+static void vpa_dtl_stop_hrtimer(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	hrtimer_cancel(&hwc->hrtimer);
+}
+
+static void vpa_dtl_reset_global_refc(struct perf_event *event)
+{
+	spin_lock(&dtl_global_lock);
+	dtl_global_refc--;
+	if (dtl_global_refc <= 0) {
+		dtl_global_refc = 0;
+		write_unlock(&dtl_access_lock);
+	}
+	spin_unlock(&dtl_global_lock);
+}
+
+static int vpa_dtl_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct vpa_dtl *dtl = &per_cpu(vpa_cpu_dtl, event->cpu);
+
+	/* test the event attr type for PMU enumeration */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (!perfmon_capable())
+		return -EACCES;
+
+	/* Return if this is a counting event */
+	if (!is_sampling_event(event))
+		return -EOPNOTSUPP;
+
+	if (!(event->attr.sample_type & PERF_SAMPLE_RAW))
+		return -EOPNOTSUPP;
+
+	/* Invalid eventcode */
+	if (event->attr.config <= 0 || event->attr.config > 4)
+		return -EINVAL;
+
+	/* ensure there are no other conflicting dtl users */
+	spin_lock(&dtl_global_lock);
+
+	/* ensure there are no other conflicting dtl users */
+	if (dtl_global_refc == 0 && !write_trylock(&dtl_access_lock)) {
+		spin_unlock(&dtl_global_lock);
+		return -EBUSY;
+	}
+	dtl_global_refc++;
+	dtl->active_lock = true;
+	spin_unlock(&dtl_global_lock);
+
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = vpa_dtl_hrtimer_handle;
+
+	event->destroy = vpa_dtl_reset_global_refc;
+	return 0;
+}
+
+static int vpa_dtl_event_add(struct perf_event *event, int flags)
+{
+	int ret, hwcpu;
+	unsigned long addr;
+	struct vpa_dtl *dtl = &per_cpu(vpa_cpu_dtl, event->cpu);
+
+	/*
+	 * Register our dtl buffer with the hypervisor. The
+	 * HV expects the buffer size to be passed in the second
+	 * word of the buffer.
+	 */
+	((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES);
+	dtl->last_idx = 0;
+
+	hwcpu = get_hard_smp_processor_id(event->cpu);
+	addr = __pa(dtl->buf);
+
+	ret = register_dtl(hwcpu, addr);
+	if (ret) {
+		pr_warn("DTL registration for cpu %d (hw %d) failed with %d\n",
+			event->cpu, hwcpu, ret);
+		return ret;
+	}
+
+	/* set our initial buffer indices */
+	lppaca_of(event->cpu).dtl_idx = 0;
+
+	/*
+	 * Ensure that our updates to the lppaca fields have
+	 * occurred before we actually enable the logging
+	 */
+	smp_wmb();
+
+	/* enable event logging */
+	lppaca_of(event->cpu).dtl_enable_mask = vpa_dtl_enable_mask[event->attr.config];
+
+	vpa_dtl_start_hrtimer(event);
+
+	return 0;
+}
+
+static void vpa_dtl_event_del(struct perf_event *event, int flags)
+{
+	int hwcpu = get_hard_smp_processor_id(event->cpu);
+
+	vpa_dtl_dump_sample_data(event);
+	vpa_dtl_stop_hrtimer(event);
+	unregister_dtl(hwcpu);
+	lppaca_of(event->cpu).dtl_enable_mask = 0x0;
+}
+
+static void vpa_dtl_event_read(struct perf_event *event)
+{
+	/*
+	 * This function defination is empty as dtl_dump_sample_data
+	 * is used to parse and dump the dispatch trace log data,
+	 * to perf raw sample.
+	 */
+}
+
+/* Allocate dtl buffer memory for given cpu. */
+static int vpa_dtl_mem_alloc(int cpu)
+{
+	struct vpa_dtl *dtl = &per_cpu(vpa_cpu_dtl, cpu);
+	struct dtl_entry *buf = NULL;
+
+	dtl->active_lock = false;
+
+	/* Check for dispatch trace log buffer cache */
+	if (!dtl_cache)
+		return -ENOMEM;
+
+	buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(cpu));
+	if (!buf) {
+		pr_warn("buffer allocation failed for cpu %d\n", cpu);
+		return -ENOMEM;
+	}
+	dtl->buf = buf;
+	return 0;
+}
+
+static int vpa_dtl_cpu_online(unsigned int cpu)
+{
+	return vpa_dtl_mem_alloc(cpu);
+}
+
+static int vpa_dtl_cpu_offline(unsigned int cpu)
+{
+	struct vpa_dtl *dtl = &per_cpu(vpa_cpu_dtl, cpu);
+
+	/* Reduce the ref count if dtl event running on this cpu */
+	spin_lock(&dtl_global_lock);
+	if (dtl_global_refc && dtl->active_lock) {
+		dtl_global_refc--;
+		dtl->active_lock = false;
+	}
+	if (dtl_global_refc <= 0) {
+		dtl_global_refc = 0;
+		write_unlock(&dtl_access_lock);
+	}
+	spin_unlock(&dtl_global_lock);
+	return 0;
+}
+
+static int vpa_dtl_cpu_hotplug_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_VPA_DTL_ONLINE,
+			  "perf/powerpc/vpa_pmu:online",
+			  vpa_dtl_cpu_online,
+			  vpa_dtl_cpu_offline);
+}
+
+static void vpa_dtl_clear_memory(void)
+{
+	int i;
+
+	for_each_online_cpu(i) {
+		struct vpa_dtl *dtl = &per_cpu(vpa_cpu_dtl, i);
+
+		kmem_cache_free(dtl_cache, dtl->buf);
+		dtl->buf = NULL;
+	}
+}
+
+static struct pmu vpa_dtl_pmu = {
+	.task_ctx_nr = perf_invalid_context,
+
+	.name = "vpa_dtl",
+	.attr_groups = attr_groups,
+	.event_init  = vpa_dtl_event_init,
+	.add         = vpa_dtl_event_add,
+	.del         = vpa_dtl_event_del,
+	.read        = vpa_dtl_event_read,
+	.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_EXCLUSIVE,
+};
+
+static int vpa_pmu_init(void)
+{
+	int r;
+
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		pr_debug("not a shared virtualized system, not enabling\n");
+		return -ENODEV;
+	}
+
+	/* init cpuhotplug */
+	r = vpa_dtl_cpu_hotplug_init();
+	if (r) {
+		vpa_dtl_clear_memory();
+		return r;
+	}
+
+	r = perf_pmu_register(&vpa_dtl_pmu, vpa_dtl_pmu.name, -1);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+device_initcall(vpa_pmu_init);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 7a5785f405b62..c335af5a5a8ee 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -232,6 +232,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
 	CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
+	CPUHP_AP_PERF_POWERPC_VPA_DTL_ONLINE,
 	CPUHP_AP_PERF_CSKY_ONLINE,
 	CPUHP_AP_TMIGR_ONLINE,
 	CPUHP_AP_WATCHDOG_ONLINE,