diff --git a/arch/arm/boot/dts/bcm2708-rpi.dtsi b/arch/arm/boot/dts/bcm2708-rpi.dtsi index f88b844f0aff82..9abf2a05059c04 100644 --- a/arch/arm/boot/dts/bcm2708-rpi.dtsi +++ b/arch/arm/boot/dts/bcm2708-rpi.dtsi @@ -37,6 +37,7 @@ fb = &fb; vchiq = &vchiq; thermal = &thermal; + axiperf = &axiperf; }; leds: leds { @@ -119,6 +120,7 @@ sd_force_pio = <&sdhost>,"brcm,force-pio?"; sd_pio_limit = <&sdhost>,"brcm,pio-limit:0"; sd_debug = <&sdhost>,"brcm,debug"; + axiperf = <&axiperf>,"status"; }; }; diff --git a/arch/arm/boot/dts/bcm270x.dtsi b/arch/arm/boot/dts/bcm270x.dtsi index d312c2739624ec..6bb6fd2b16b661 100644 --- a/arch/arm/boot/dts/bcm270x.dtsi +++ b/arch/arm/boot/dts/bcm270x.dtsi @@ -130,6 +130,14 @@ /* Add alias */ status = "disabled"; }; + + axiperf: axiperf { + compatible = "brcm,bcm2835-axiperf"; + reg = <0x7e009800 0x100>, + <0x7ee08000 0x100>; + firmware = <&firmware>; + status = "disabled"; + }; }; vdd_5v0_reg: fixedregulator_5v0 { diff --git a/arch/arm/configs/bcm2709_defconfig b/arch/arm/configs/bcm2709_defconfig index b520a6ccb97165..8e22a7836106fc 100644 --- a/arch/arm/configs/bcm2709_defconfig +++ b/arch/arm/configs/bcm2709_defconfig @@ -1179,6 +1179,7 @@ CONFIG_INV_MPU6050_I2C=m CONFIG_BMP280=m CONFIG_PWM_BCM2835=m CONFIG_PWM_PCA9685=m +CONFIG_RPI_AXIPERF=m CONFIG_RASPBERRYPI_FIRMWARE=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y diff --git a/arch/arm/configs/bcmrpi_defconfig b/arch/arm/configs/bcmrpi_defconfig index e85544974de7de..b2ffab8b160f59 100644 --- a/arch/arm/configs/bcmrpi_defconfig +++ b/arch/arm/configs/bcmrpi_defconfig @@ -1185,6 +1185,7 @@ CONFIG_INV_MPU6050_I2C=m CONFIG_BMP280=m CONFIG_PWM_BCM2835=m CONFIG_PWM_PCA9685=m +CONFIG_RPI_AXIPERF=m CONFIG_RASPBERRYPI_FIRMWARE=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index e5197ffb74229e..736c22e00e8f84 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -43,4 +43,11 @@ config XGENE_PMU help Say y if you want to use APM X-Gene SoC performance monitors. +config RPI_AXIPERF + depends on ARCH_BCM2835 + tristate "RaspberryPi AXI Performance monitors" + default n + help + Say y if you want to use Raspberry Pi AXI performance monitors, m if + you want to build it as a module. endmenu diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 6420bd4394d5e4..fa0f76a2947c5e 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o +obj-$(CONFIG_RPI_AXIPERF) += raspberrypi_axi_monitor.o diff --git a/drivers/perf/raspberrypi_axi_monitor.c b/drivers/perf/raspberrypi_axi_monitor.c new file mode 100644 index 00000000000000..bafe03b6c00006 --- /dev/null +++ b/drivers/perf/raspberrypi_axi_monitor.c @@ -0,0 +1,637 @@ +/* + * raspberrypi_axi_monitor.c + * + * Author: james.hughes@raspberrypi.org + * + * Raspberry Pi AXI performance counters. + * + * Copyright (C) 2017 Raspberry Pi Trading Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define NUM_MONITORS 2 +#define NUM_BUS_WATCHERS_PER_MONITOR 3 + +#define SYSTEM_MONITOR 0 +#define VPU_MONITOR 1 + +#define MAX_BUSES 16 +#define DEFAULT_SAMPLE_TIME 100 + +#define NUM_BUS_WATCHER_RESULTS 9 + +struct bus_watcher_data { + union { + u32 results[NUM_BUS_WATCHER_RESULTS]; + struct { + u32 atrans; + u32 atwait; + u32 amax; + u32 wtrans; + u32 wtwait; + u32 wmax; + u32 rtrans; + u32 rtwait; + u32 rmax; + }; + }; +}; + + +struct rpi_axiperf { + struct platform_device *dev; + struct dentry *root_folder; + + struct task_struct *monitor_thread; + struct mutex lock; + + struct rpi_firmware *firmware; + + /* Sample time spent on for each bus */ + int sample_time; + + /* Now storage for the per monitor settings and the resulting + * performance figures + */ + struct { + /* Bit field of buses we want to monitor */ + int bus_enabled; + /* Bit field of buses to filter by */ + int bus_filter; + /* The current buses being monitored on this monitor */ + int current_bus[NUM_BUS_WATCHERS_PER_MONITOR]; + /* The last bus monitored on this monitor */ + int last_monitored; + + /* Set true if this mailbox must use the mailbox interface + * rather than access registers directly. + */ + int use_mailbox_interface; + + /* Current result values */ + struct bus_watcher_data results[MAX_BUSES]; + + struct dentry *debugfs_entry; + void __iomem *base_address; + + } monitor[NUM_MONITORS]; + +}; + +static struct rpi_axiperf *state; + +/* Two monitors, System and VPU, each with the following register sets. + * Each monitor can only monitor one bus at a time, so we time share them, + * giving each bus 100ms (default, settable via debugfs) of time on its + * associated monitor + * Record results from the three Bus watchers per monitor and push to the sysfs + */ + +/* general registers */ +const int GEN_CTRL; + +const int GEN_CTL_ENABLE_BIT = BIT(0); +const int GEN_CTL_RESET_BIT = BIT(1); + +/* Bus watcher registers */ +const int BW_PITCH = 0x40; + +const int BW0_CTRL = 0x40; +const int BW1_CTRL = 0x80; +const int BW2_CTRL = 0xc0; + +const int BW_ATRANS_OFFSET = 0x04; +const int BW_ATWAIT_OFFSET = 0x08; +const int BW_AMAX_OFFSET = 0x0c; +const int BW_WTRANS_OFFSET = 0x10; +const int BW_WTWAIT_OFFSET = 0x14; +const int BW_WMAX_OFFSET = 0x18; +const int BW_RTRANS_OFFSET = 0x1c; +const int BW_RTWAIT_OFFSET = 0x20; +const int BW_RMAX_OFFSET = 0x24; + +const int BW_CTRL_RESET_BIT = BIT(31); +const int BW_CTRL_ENABLE_BIT = BIT(30); +const int BW_CTRL_ENABLE_ID_FILTER_BIT = BIT(29); +const int BW_CTRL_LIMIT_HALT_BIT = BIT(28); + +const int BW_CTRL_SOURCE_SHIFT = 8; +const int BW_CTRL_SOURCE_MASK = GENMASK(12, 8); // 5 bits +const int BW_CTRL_BUS_WATCH_SHIFT; +const int BW_CTRL_BUS_WATCH_MASK = GENMASK(5, 0); // 6 bits +const int BW_CTRL_BUS_FILTER_SHIFT = 8; + +const static char *bus_filter_strings[] = { + "", + "CORE0_V", + "ICACHE0", + "DCACHE0", + "CORE1_V", + "ICACHE1", + "DCACHE1", + "L2_MAIN", + "HOST_PORT", + "HOST_PORT2", + "HVS", + "ISP", + "VIDEO_DCT", + "VIDEO_SD2AXI", + "CAM0", + "CAM1", + "DMA0", + "DMA1", + "DMA2_VPU", + "JPEG", + "VIDEO_CME", + "TRANSPOSER", + "VIDEO_FME", + "CCP2TX", + "USB", + "V3D0", + "V3D1", + "V3D2", + "AVE", + "DEBUG", + "CPU", + "M30" +}; + +const int num_bus_filters = ARRAY_SIZE(bus_filter_strings); + +const static char *system_bus_string[] = { + "DMA_L2", + "TRANS", + "JPEG", + "SYSTEM_UC", + "DMA_UC", + "SYSTEM_L2", + "CCP2TX", + "MPHI_RX", + "MPHI_TX", + "HVS", + "H264", + "ISP", + "V3D", + "PERIPHERAL", + "CPU_UC", + "CPU_L2" +}; + +const int num_system_buses = ARRAY_SIZE(system_bus_string); + +const static char *vpu_bus_string[] = { + "VPU1_D_L2", + "VPU0_D_L2", + "VPU1_I_L2", + "VPU0_I_L2", + "SYSTEM_L2", + "L2_FLUSH", + "DMA_L2", + "VPU1_D_UC", + "VPU0_D_UC", + "VPU1_I_UC", + "VPU0_I_UC", + "SYSTEM_UC", + "L2_OUT", + "DMA_UC", + "SDRAM", + "L2_IN" +}; + +const int num_vpu_buses = ARRAY_SIZE(vpu_bus_string); + +const static char *monitor_name[] = { + "System", + "VPU" +}; + +static inline void write_reg(int monitor, int reg, u32 value) +{ + writel(value, state->monitor[monitor].base_address + reg); +} + +static inline u32 read_reg(int monitor, u32 reg) +{ + return readl(state->monitor[monitor].base_address + reg); +} + +static void read_bus_watcher(int monitor, int watcher, u32 *results) +{ + if (state->monitor[monitor].use_mailbox_interface) { + /* We have 9 results, plus the overheads of start address and + * length So 11 u32 to define + */ + u32 tmp[11]; + int err; + + tmp[0] = (u32)(state->monitor[monitor].base_address + watcher + + BW_ATRANS_OFFSET); + tmp[1] = NUM_BUS_WATCHER_RESULTS; + + err = rpi_firmware_property(state->firmware, + RPI_FIRMWARE_GET_PERIPH_REG, + tmp, sizeof(tmp)); + + if (err < 0 || tmp[1] != NUM_BUS_WATCHER_RESULTS) + dev_err_once(&state->dev->dev, + "Failed to read bus watcher"); + else + memcpy(results, &tmp[2], + NUM_BUS_WATCHER_RESULTS * sizeof(u32)); + } else { + int i; + void __iomem *addr = state->monitor[monitor].base_address + + watcher + BW_ATRANS_OFFSET; + for (i = 0; i < NUM_BUS_WATCHER_RESULTS; i++, addr += 4) + *results++ = readl(addr); + } +} + +static void set_monitor_control(int monitor, u32 set) +{ + if (state->monitor[monitor].use_mailbox_interface) { + u32 tmp[3] = {(u32)(state->monitor[monitor].base_address + + GEN_CTRL), 1, set}; + int err = rpi_firmware_property(state->firmware, + RPI_FIRMWARE_SET_PERIPH_REG, + tmp, sizeof(tmp)); + + if (err < 0 || tmp[1] != 1) + dev_err_once(&state->dev->dev, + "Failed to set monitor control"); + } else + write_reg(monitor, GEN_CTRL, set); +} + +static void set_bus_watcher_control(int monitor, int watcher, u32 set) +{ + if (state->monitor[monitor].use_mailbox_interface) { + u32 tmp[3] = {(u32)(state->monitor[monitor].base_address + + watcher), 1, set}; + int err = rpi_firmware_property(state->firmware, + RPI_FIRMWARE_SET_PERIPH_REG, + tmp, sizeof(tmp)); + if (err < 0 || tmp[1] != 1) + dev_err_once(&state->dev->dev, + "Failed to set bus watcher control"); + } else + write_reg(monitor, watcher, set); +} + +static void monitor(struct rpi_axiperf *state) +{ + int monitor, num_buses[NUM_MONITORS]; + + mutex_lock(&state->lock); + + for (monitor = 0; monitor < NUM_MONITORS; monitor++) { + typeof(state->monitor[0]) *mon = &(state->monitor[monitor]); + + /* Anything enabled? */ + if (mon->bus_enabled == 0) { + /* No, disable all monitoring for this monitor */ + set_monitor_control(monitor, GEN_CTL_RESET_BIT); + } else { + int i; + + /* Find out how many busses we want to monitor, and + * spread our 3 actual monitors over them + */ + num_buses[monitor] = hweight32(mon->bus_enabled); + num_buses[monitor] = min(num_buses[monitor], + NUM_BUS_WATCHERS_PER_MONITOR); + + for (i = 0; i < num_buses[monitor]; i++) { + int bus_control; + + do { + mon->last_monitored++; + mon->last_monitored &= 0xf; + } while ((mon->bus_enabled & + (1 << mon->last_monitored)) == 0); + + mon->current_bus[i] = mon->last_monitored; + + /* Reset the counters */ + set_bus_watcher_control(monitor, + BW0_CTRL + + i*BW_PITCH, + BW_CTRL_RESET_BIT); + + bus_control = BW_CTRL_ENABLE_BIT | + mon->current_bus[i]; + + if (mon->bus_filter) { + bus_control |= + BW_CTRL_ENABLE_ID_FILTER_BIT; + bus_control |= + ((mon->bus_filter & 0x1f) + << BW_CTRL_BUS_FILTER_SHIFT); + } + + // Start capture + set_bus_watcher_control(monitor, + BW0_CTRL + i*BW_PITCH, + bus_control); + } + } + + /* start monitoring */ + set_monitor_control(monitor, GEN_CTL_ENABLE_BIT); + } + + mutex_unlock(&state->lock); + + msleep(state->sample_time); + + /* Now read the results */ + + mutex_lock(&state->lock); + for (monitor = 0; monitor < NUM_MONITORS; monitor++) { + typeof(state->monitor[0]) *mon = &(state->monitor[monitor]); + + /* Anything enabled? */ + if (mon->bus_enabled == 0) { + /* No, disable all monitoring for this monitor */ + set_monitor_control(monitor, 0); + } else { + int i; + + for (i = 0; i < num_buses[monitor]; i++) { + int bus = mon->current_bus[i]; + + read_bus_watcher(monitor, + BW0_CTRL + i*BW_PITCH, + (u32 *)&mon->results[bus].results); + } + } + } + mutex_unlock(&state->lock); +} + +static int monitor_thread(void *data) +{ + struct rpi_axiperf *state = data; + + while (1) { + monitor(state); + + if (kthread_should_stop()) + return 0; + } + return 0; +} + +static ssize_t myreader(struct file *fp, char __user *user_buffer, + size_t count, loff_t *position) +{ +#define INIT_BUFF_SIZE 2048 + + int i; + int idx = (int)(fp->private_data); + int num_buses, cnt; + char *string_buffer; + int buff_size = INIT_BUFF_SIZE; + char *p; + typeof(state->monitor[0]) *mon = &(state->monitor[idx]); + + if (idx < 0 || idx > NUM_MONITORS) + idx = 0; + + num_buses = idx == SYSTEM_MONITOR ? num_system_buses : num_vpu_buses; + + string_buffer = kmalloc(buff_size, GFP_KERNEL); + + if (!string_buffer) { + dev_err(&state->dev->dev, + "Failed temporary string allocation\n"); + return 0; + } + + p = string_buffer; + + mutex_lock(&state->lock); + + if (mon->bus_filter) { + int filt = min(mon->bus_filter & 0x1f, num_bus_filters); + + cnt = snprintf(p, buff_size, + "\nMonitoring transactions from %s only\n", + bus_filter_strings[filt]); + p += cnt; + buff_size -= cnt; + } + + cnt = snprintf(p, buff_size, " Bus | Atrans Atwait AMax Wtrans Wtwait WMax Rtrans Rtwait RMax\n" + "======================================================================================================\n"); + + if (cnt >= buff_size) + goto done; + + p += cnt; + buff_size -= cnt; + + for (i = 0; i < num_buses; i++) { + if (mon->bus_enabled & (1 << i)) { +#define DIVIDER (1024) + typeof(mon->results[0]) *res = &(mon->results[i]); + + cnt = snprintf(p, buff_size, + "%10s | %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK\n", + idx == SYSTEM_MONITOR ? + system_bus_string[i] : + vpu_bus_string[i], + res->atrans/DIVIDER, + res->atwait/DIVIDER, + res->amax/DIVIDER, + res->wtrans/DIVIDER, + res->wtwait/DIVIDER, + res->wmax/DIVIDER, + res->rtrans/DIVIDER, + res->rtwait/DIVIDER, + res->rmax/DIVIDER + ); + if (cnt >= buff_size) + goto done; + + p += cnt; + buff_size -= cnt; + } + } + + mutex_unlock(&state->lock); + +done: + + /* did the last string entry exceeed our buffer size? ie out of string + * buffer space. Null terminate, use what we have. + */ + if (cnt >= buff_size) { + buff_size = 0; + string_buffer[INIT_BUFF_SIZE] = 0; + } + + cnt = simple_read_from_buffer(user_buffer, count, position, + string_buffer, + INIT_BUFF_SIZE - buff_size); + + kfree(string_buffer); + + return cnt; +} + +static ssize_t mywriter(struct file *fp, const char __user *user_buffer, + size_t count, loff_t *position) +{ + int idx = (int)(fp->private_data); + + if (idx < 0 || idx > NUM_MONITORS) + idx = 0; + + /* At the moment, this does nothing, but in the future it could be + * used to reset counters etc + */ + return count; +} + +static const struct file_operations fops_debug = { + .read = myreader, + .write = mywriter, + .open = simple_open +}; + +static int rpi_axiperf_probe(struct platform_device *pdev) +{ + int ret = 0, i; + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct device_node *fw_node; + + state = kzalloc(sizeof(struct rpi_axiperf), GFP_KERNEL); + if (!state) + return -ENOMEM; + + /* Get the firmware handle for future rpi-firmware-xxx calls */ + fw_node = of_parse_phandle(np, "firmware", 0); + if (!fw_node) { + dev_err(dev, "Missing firmware node\n"); + return -ENOENT; + } + + state->firmware = rpi_firmware_get(fw_node); + if (!state->firmware) + return -EPROBE_DEFER; + + /* Special case for the VPU monitor, we must use the mailbox interface + * as it is not accessible from the ARM address space. + */ + state->monitor[VPU_MONITOR].use_mailbox_interface = 1; + state->monitor[SYSTEM_MONITOR].use_mailbox_interface = 0; + + for (i = 0; i < NUM_MONITORS; i++) { + if (state->monitor[i].use_mailbox_interface) { + of_property_read_u32_index(np, "reg", i*2, + (u32 *)(&state->monitor[i].base_address)); + } else { + struct resource *resource = + platform_get_resource(pdev, IORESOURCE_MEM, i); + + state->monitor[i].base_address = + devm_ioremap_resource(&pdev->dev, resource); + } + + if (IS_ERR(state->monitor[i].base_address)) + return PTR_ERR(state->monitor[i].base_address); + + /* Enable all buses by default */ + state->monitor[i].bus_enabled = 0xffff; + } + + state->dev = pdev; + platform_set_drvdata(pdev, state); + + state->sample_time = DEFAULT_SAMPLE_TIME; + + /* Set up all the debugfs stuff */ + state->root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL); + + for (i = 0; i < NUM_MONITORS; i++) { + state->monitor[i].debugfs_entry = + debugfs_create_dir(monitor_name[i], state->root_folder); + if (IS_ERR(state->monitor[i].debugfs_entry)) + state->monitor[i].debugfs_entry = NULL; + + debugfs_create_file("data", 0444, + state->monitor[i].debugfs_entry, + (void *)i, &fops_debug); + debugfs_create_u32("enable", 0644, + state->monitor[i].debugfs_entry, + &state->monitor[i].bus_enabled); + debugfs_create_u32("filter", 0644, + state->monitor[i].debugfs_entry, + &state->monitor[i].bus_filter); + debugfs_create_u32("sample_time", 0644, + state->monitor[i].debugfs_entry, + &state->sample_time); + } + + mutex_init(&state->lock); + + state->monitor_thread = kthread_run(monitor_thread, state, + "rpi-axiperfmon"); + + return ret; + +} + +static int rpi_axiperf_remove(struct platform_device *dev) +{ + int ret = 0; + + kthread_stop(state->monitor_thread); + + debugfs_remove_recursive(state->root_folder); + state->root_folder = NULL; + + return ret; +} + +static const struct of_device_id rpi_axiperf_match[] = { + { + .compatible = "brcm,bcm2835-axiperf", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, rpi_axiperf_match); + +static struct platform_driver rpi_axiperf_driver = { + .probe = rpi_axiperf_probe, + .remove = rpi_axiperf_remove, + .driver = { + .name = "rpi-bcm2835-axiperf", + .of_match_table = of_match_ptr(rpi_axiperf_match), + }, +}; + +module_platform_driver(rpi_axiperf_driver); + +/* Module information */ +MODULE_AUTHOR("James Hughes "); +MODULE_DESCRIPTION("RPI AXI Performance monitor driver"); +MODULE_LICENSE("GPL"); + diff --git a/include/soc/bcm2835/raspberrypi-firmware.h b/include/soc/bcm2835/raspberrypi-firmware.h index dc7fd58afd5ddd..cbd1674570433f 100644 --- a/include/soc/bcm2835/raspberrypi-firmware.h +++ b/include/soc/bcm2835/raspberrypi-firmware.h @@ -88,6 +88,9 @@ enum rpi_firmware_property_tag { RPI_FIRMWARE_SET_SDHOST_CLOCK = 0x00038042, RPI_FIRMWARE_GET_GPIO_CONFIG = 0x00030043, RPI_FIRMWARE_SET_GPIO_CONFIG = 0x00038043, + RPI_FIRMWARE_GET_PERIPH_REG = 0x00030045, + RPI_FIRMWARE_SET_PERIPH_REG = 0x00038045, + /* Dispmanx TAGS */ RPI_FIRMWARE_FRAMEBUFFER_ALLOCATE = 0x00040001,