diff --git a/includes/bpf_helpers.h b/includes/bpf_helpers.h index 8da81fec..ead9c51d 100644 --- a/includes/bpf_helpers.h +++ b/includes/bpf_helpers.h @@ -191,4 +191,25 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) #endif +/* +the TP_DATA_LOC_READ_* macros are used for reading from a field that's pointed +to by a __data_loc variable. + +FYI, a __data_loc variable is really an int that contains within it the data +needed to get the location of the actual value. these macros do the +transformation needed to get that final location and then read from it. + +this code is from iovisor/bcc file src/cc/exports/helpers.h and modified by +Netdata's Agent team for inclusion in Netdata. +*/ +#define TP_DATA_LOC_READ_CONST(_dst, _arg, _data_loc, _length) do { \ + unsigned short __offset = _data_loc & 0xFFFF; \ + bpf_probe_read((void *)_dst, _length, (char *)_arg + __offset); \ +} while (0) +#define TP_DATA_LOC_READ(_dst, _arg, _data_loc) do { \ + unsigned short __offset = _data_loc & 0xFFFF; \ + unsigned short __length = _data_loc >> 16; \ + bpf_probe_read((void *)_dst, __length, (char *)_arg + __offset); \ +} while (0) + #endif diff --git a/includes/netdata_ebpf.h b/includes/netdata_ebpf.h index 2077d5aa..24c5a268 100644 --- a/includes/netdata_ebpf.h +++ b/includes/netdata_ebpf.h @@ -17,6 +17,7 @@ This header has the common definitions for all `.c` files. #include "netdata_disk.h" #include "netdata_fd.h" #include "netdata_fs.h" +#include "netdata_hardirq.h" #include "netdata_mount.h" #include "netdata_network.h" #include "netdata_process.h" diff --git a/includes/netdata_hardirq.h b/includes/netdata_hardirq.h new file mode 100644 index 00000000..be272332 --- /dev/null +++ b/includes/netdata_hardirq.h @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef _NETDATA_HARDIRQ_H_ +#define _NETDATA_HARDIRQ_H_ 1 + +#define NETDATA_HARDIRQ_MAX_IRQS 1024L +#define NETDATA_HARDIRQ_NAME_LEN 32 + +// /sys/kernel/debug/tracing/events/irq/irq_handler_entry/ +struct netdata_irq_handler_entry { + u64 pad; // This is not used with eBPF + int irq; // offset:8; size:4; signed:1; + int data_loc_name; // offset:12; size:4; signed:1; (https://github.com/iovisor/bpftrace/issues/385) + // (https://lists.linuxfoundation.org/pipermail/iovisor-dev/2017-February/000627.html) +}; + +// /sys/kernel/debug/tracing/events/irq/irq_handler_exit/ +struct netdata_irq_handler_exit { + u64 pad; // This is not used with eBPF + int irq; // offset:8; size:4; signed:1; + int ret; // offset:12; size:4; signed:1; +}; + +typedef struct hardirq_key { + int irq; +} hardirq_key_t; + +typedef struct hardirq_val { + // incremental counter storing the total latency so far. + u64 latency; + + // temporary timestamp stored at the IRQ entry handler, to be diff'd with a + // timestamp at the IRQ exit handler, to get the latency to add to the + // `latency` field. + u64 ts; + + // identifies the IRQ with a human-readable string. + char name[NETDATA_HARDIRQ_NAME_LEN]; +} hardirq_val_t; + +/************************************************************************************ + * HARDIRQ STATIC + ***********************************************************************************/ + +// all of the `irq_vectors` events, except `vector_*`, have the same format. +// cat /sys/kernel/debug/tracing/available_events | grep 'irq_vectors' | grep -v ':vector_' +struct netdata_irq_vectors_entry { + u64 pad; // This is not used with eBPF + int vector; // offset:8; size:4; signed:1; +}; +struct netdata_irq_vectors_exit { + u64 pad; // This is not used with eBPF + int vector; // offset:8; size:4; signed:1; +}; + +// these represent static IRQs that aren't given an IRQ ID like the ones above. +// they each require separate entry/exit tracepoints to track. +enum netdata_hardirq_static { + NETDATA_HARDIRQ_STATIC_APIC_THERMAL, + NETDATA_HARDIRQ_STATIC_APIC_THRESHOLD, + NETDATA_HARDIRQ_STATIC_APIC_ERROR, + NETDATA_HARDIRQ_STATIC_APIC_DEFERRED_ERROR, + NETDATA_HARDIRQ_STATIC_APIC_SPURIOUS, + NETDATA_HARDIRQ_STATIC_FUNC_CALL, + NETDATA_HARDIRQ_STATIC_FUNC_CALL_SINGLE, + NETDATA_HARDIRQ_STATIC_RESCHEDULE, + NETDATA_HARDIRQ_STATIC_LOCAL_TIMER, + NETDATA_HARDIRQ_STATIC_IRQ_WORK, + NETDATA_HARDIRQ_STATIC_X86_PLATFORM_IPI, + + // must be last; used as counter. + NETDATA_HARDIRQ_STATIC_END +}; + +typedef struct hardirq_static_val { + // incremental counter storing the total latency so far. + u64 latency; + + // temporary timestamp stored at the IRQ entry handler, to be diff'd with a + // timestamp at the IRQ exit handler, to get the latency to add to the + // `latency` field. + u64 ts; +} hardirq_static_val_t; + +#endif /* _NETDATA_HARDIRQ_H_ */ diff --git a/kernel/Makefile b/kernel/Makefile index f96a0e05..650bc8b0 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -40,6 +40,7 @@ NETDATA_APPS= btrfs \ fd \ fdatasync \ fsync \ + hardirq \ mount \ msync \ nfs \ diff --git a/kernel/README.md b/kernel/README.md index 91576038..f3f66825 100644 --- a/kernel/README.md +++ b/kernel/README.md @@ -16,21 +16,22 @@ of your distribution. Right now we have the following `eBPF` program collectors: -- `cachestat_kern.c` : provides Linux page cache monitoring. -- `dc_kern.c` : provides Linux directory cache monitoring. -- `disk_kern.c` : provides disk latency monitoring. -- `ext4_kern.c` : provides ext4 monitoring. -- `fdatasync_kern.c` : monitor calls for syscall `fdatasync`. -- `fsync_kern.c` : monitor calls for syscall `fsync`. -- `mount_kern.c` : monitor calls for syscalls `mount` and `umount`. -- `msync_kern.c` : monitor calls for syscall `msync`. -- `nfs_kern.c` : provides nfs monitoring. -- `process_kern.c` : provides process, file and VFS stats. -- `socket_kern.c` : provides network stats; -- `swap_kern.c` : provides swap stats; -- `sync_file_range_kern.c`: monitor calls for syscall `sync_file_range`. -- `sync_kern.c` : monitor calls for syscall `sync`. -- `syncfs_kern.c` : monitor calls for syscall `syncfs`. -- `vfs_kern.c` : monitor Virtual Filesystem functions. -- `xfs_kern.c` : provides XFS monitoring. -- `zfs_kern.c` : provides ZFS monitoring. +- `cachestat_kern.c` : provides Linux page cache monitoring. +- `dc_kern.c` : provides Linux directory cache monitoring. +- `disk_kern.c` : provides disk latency monitoring. +- `ext4_kern.c` : provides ext4 monitoring. +- `fdatasync_kern.c` : monitor calls for syscall `fdatasync`. +- `fsync_kern.c` : monitor calls for syscall `fsync`. +- `hardirq_kern.c` : provides hard interrupt (hard IRQ) latency monitoring. +- `mount_kern.c` : monitor calls for syscalls `mount` and `umount`. +- `msync_kern.c` : monitor calls for syscall `msync`. +- `nfs_kern.c` : provides nfs monitoring. +- `process_kern.c` : provides process, file and VFS stats. +- `socket_kern.c` : provides network stats; +- `swap_kern.c` : provides swap stats; +- `sync_file_range_kern.c`: monitor calls for syscall `sync_file_range`. +- `sync_kern.c` : monitor calls for syscall `sync`. +- `syncfs_kern.c` : monitor calls for syscall `syncfs`. +- `vfs_kern.c` : monitor Virtual Filesystem functions. +- `xfs_kern.c` : provides XFS monitoring. +- `zfs_kern.c` : provides ZFS monitoring. diff --git a/kernel/hardirq_kern.c b/kernel/hardirq_kern.c new file mode 100644 index 00000000..7c9b9c28 --- /dev/null +++ b/kernel/hardirq_kern.c @@ -0,0 +1,238 @@ +#define KBUILD_MODNAME "hardirq_netdata" +#include +#include +#include + +#include "bpf_helpers.h" +#include "netdata_ebpf.h" + +/************************************************************************************ + * MAPS + ***********************************************************************************/ + +struct bpf_map_def SEC("maps") tbl_hardirq = { +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0)) + .type = BPF_MAP_TYPE_HASH, +#else + .type = BPF_MAP_TYPE_PERCPU_HASH, +#endif + .key_size = sizeof(hardirq_key_t), + .value_size = sizeof(hardirq_val_t), + .max_entries = NETDATA_HARDIRQ_MAX_IRQS +}; + +// maps from enum index to latency. +struct bpf_map_def SEC("maps") tbl_hardirq_static = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(hardirq_static_val_t), + .max_entries = NETDATA_HARDIRQ_STATIC_END +}; + +/************************************************************************************ + * HARDIRQ SECTION + ***********************************************************************************/ + +SEC("tracepoint/irq/irq_handler_entry") +int netdata_irq_handler_entry(struct netdata_irq_handler_entry *ptr) +{ + hardirq_key_t key = {}; + hardirq_val_t *valp, val = {}; + + key.irq = ptr->irq; + valp = bpf_map_lookup_elem(&tbl_hardirq, &key); + if (!valp) { + valp = &val; + val.latency = 0; + TP_DATA_LOC_READ_CONST(val.name, ptr, ptr->data_loc_name, NETDATA_HARDIRQ_NAME_LEN); + } + + valp->ts = bpf_ktime_get_ns(); + bpf_map_update_elem(&tbl_hardirq, &key, valp, BPF_ANY); + + return 0; +} + +SEC("tracepoint/irq/irq_handler_exit") +int netdata_irq_handler_exit(struct netdata_irq_handler_exit *ptr) +{ + hardirq_key_t key = {}; + hardirq_val_t *valp; + + key.irq = ptr->irq; + valp = bpf_map_lookup_elem(&tbl_hardirq, &key); + if (!valp) { + return 0; + } + + // get time diff and convert to microseconds. + u64 latency = (bpf_ktime_get_ns() - valp->ts) / 1000; + libnetdata_update_u64(&valp->latency, latency); + + return 0; +} + +/************************************************************************************ + * HARDIRQ STATIC + ***********************************************************************************/ + +#define HARDIRQ_STATIC_GEN_ENTRY(__type, __enum_idx) \ +int netdata_irq_ ##__type(struct netdata_irq_vectors_entry *ptr) \ +{ \ + u32 idx; \ + hardirq_static_val_t *valp, val = {}; \ + \ + idx = __enum_idx; \ + valp = bpf_map_lookup_elem(&tbl_hardirq_static, &idx); \ + if (!valp) { \ + valp = &val; \ + val.latency = 0; \ + } \ + \ + valp->ts = bpf_ktime_get_ns(); \ + bpf_map_update_elem(&tbl_hardirq_static, &idx, valp, BPF_ANY); \ + \ + return 0; \ +} + +#define HARDIRQ_STATIC_GEN_EXIT(__type, __enum_idx) \ +int netdata_irq_ ##__type(struct netdata_irq_vectors_exit *ptr) \ +{ \ + u32 idx; \ + hardirq_static_val_t *valp; \ + \ + idx = __enum_idx; \ + valp = bpf_map_lookup_elem(&tbl_hardirq_static, &idx); \ + if (!valp) { \ + return 0; \ + } \ + \ + /* get time diff and convert to microseconds. */ \ + u64 latency = (bpf_ktime_get_ns() - valp->ts) / 1000; \ + libnetdata_update_u64(&valp->latency, latency); \ + \ + return 0; \ +} + +SEC("tracepoint/irq_vectors/thermal_apic_entry") +HARDIRQ_STATIC_GEN_ENTRY( + thermal_apic_entry, + NETDATA_HARDIRQ_STATIC_APIC_THERMAL +) +SEC("tracepoint/irq_vectors/thermal_apic_exit") +HARDIRQ_STATIC_GEN_EXIT( + thermal_apic_exit, + NETDATA_HARDIRQ_STATIC_APIC_THERMAL +) + +SEC("tracepoint/irq_vectors/threshold_apic_entry") +HARDIRQ_STATIC_GEN_ENTRY( + threshold_apic_entry, + NETDATA_HARDIRQ_STATIC_APIC_THRESHOLD +) +SEC("tracepoint/irq_vectors/threshold_apic_exit") +HARDIRQ_STATIC_GEN_EXIT( + threshold_apic_exit, + NETDATA_HARDIRQ_STATIC_APIC_THRESHOLD +) + +SEC("tracepoint/irq_vectors/error_apic_entry") +HARDIRQ_STATIC_GEN_ENTRY( + error_apic_entry, + NETDATA_HARDIRQ_STATIC_APIC_ERROR +) +SEC("tracepoint/irq_vectors/error_apic_exit") +HARDIRQ_STATIC_GEN_EXIT( + error_apic_exit, + NETDATA_HARDIRQ_STATIC_APIC_ERROR +) + +SEC("tracepoint/irq_vectors/deferred_error_apic_entry") +HARDIRQ_STATIC_GEN_ENTRY( + deferred_error_apic_entry, + NETDATA_HARDIRQ_STATIC_APIC_DEFERRED_ERROR +) +SEC("tracepoint/irq_vectors/deferred_error_apic_exit") +HARDIRQ_STATIC_GEN_EXIT( + deferred_error_apic_exit, + NETDATA_HARDIRQ_STATIC_APIC_DEFERRED_ERROR +) + +SEC("tracepoint/irq_vectors/spurious_apic_entry") +HARDIRQ_STATIC_GEN_ENTRY( + spurious_apic_entry, + NETDATA_HARDIRQ_STATIC_APIC_SPURIOUS +) +SEC("tracepoint/irq_vectors/spurious_apic_exit") +HARDIRQ_STATIC_GEN_EXIT( + spurious_apic_exit, + NETDATA_HARDIRQ_STATIC_APIC_SPURIOUS +) + +SEC("tracepoint/irq_vectors/call_function_entry") +HARDIRQ_STATIC_GEN_ENTRY( + call_function_entry, + NETDATA_HARDIRQ_STATIC_FUNC_CALL +) +SEC("tracepoint/irq_vectors/call_function_exit") +HARDIRQ_STATIC_GEN_EXIT( + call_function_exit, + NETDATA_HARDIRQ_STATIC_FUNC_CALL +) + +SEC("tracepoint/irq_vectors/call_function_single_entry") +HARDIRQ_STATIC_GEN_ENTRY( + call_function_single_entry, + NETDATA_HARDIRQ_STATIC_FUNC_CALL_SINGLE +) +SEC("tracepoint/irq_vectors/call_function_single_exit") +HARDIRQ_STATIC_GEN_EXIT( + call_function_single_exit, + NETDATA_HARDIRQ_STATIC_FUNC_CALL_SINGLE +) + +SEC("tracepoint/irq_vectors/reschedule_entry") +HARDIRQ_STATIC_GEN_ENTRY( + reschedule_entry, + NETDATA_HARDIRQ_STATIC_RESCHEDULE +) +SEC("tracepoint/irq_vectors/reschedule_exit") +HARDIRQ_STATIC_GEN_EXIT( + reschedule_exit, + NETDATA_HARDIRQ_STATIC_RESCHEDULE +) + +SEC("tracepoint/irq_vectors/local_timer_entry") +HARDIRQ_STATIC_GEN_ENTRY( + local_timer_entry, + NETDATA_HARDIRQ_STATIC_LOCAL_TIMER +) +SEC("tracepoint/irq_vectors/local_timer_exit") +HARDIRQ_STATIC_GEN_EXIT( + local_timer_exit, + NETDATA_HARDIRQ_STATIC_LOCAL_TIMER +) + +SEC("tracepoint/irq_vectors/irq_work_entry") +HARDIRQ_STATIC_GEN_ENTRY( + irq_work_entry, + NETDATA_HARDIRQ_STATIC_IRQ_WORK +) +SEC("tracepoint/irq_vectors/irq_work_exit") +HARDIRQ_STATIC_GEN_EXIT( + irq_work_exit, + NETDATA_HARDIRQ_STATIC_IRQ_WORK +) + +SEC("tracepoint/irq_vectors/x86_platform_ipi_entry") +HARDIRQ_STATIC_GEN_ENTRY( + x86_platform_ipi_entry, + NETDATA_HARDIRQ_STATIC_X86_PLATFORM_IPI +) +SEC("tracepoint/irq_vectors/x86_platform_ipi_exit") +HARDIRQ_STATIC_GEN_EXIT( + x86_platform_ipi_exit, + NETDATA_HARDIRQ_STATIC_X86_PLATFORM_IPI +) + +char _license[] SEC("license") = "GPL";