From 9447da55da69dd5b8e3773adc96b2cdea1ca7a2e Mon Sep 17 00:00:00 2001 From: Andrea Terzolo Date: Mon, 9 Jan 2023 22:04:59 +0100 Subject: [PATCH 1/6] fix: correctly free the state in modern bpf probe Signed-off-by: Andrea Terzolo --- userspace/libpman/src/capture.c | 7 ++++++- userspace/libpman/src/lifecycle.c | 9 +++++---- userspace/libpman/src/state.c | 13 +++++++++---- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/userspace/libpman/src/capture.c b/userspace/libpman/src/capture.c index c89fec7820..c4bd3ece98 100644 --- a/userspace/libpman/src/capture.c +++ b/userspace/libpman/src/capture.c @@ -40,7 +40,12 @@ int pman_enable_capture(bool *tp_set) int pman_disable_capture() { - return pman_detach_all_programs(); + /* If we fail at initialization time the BPF skeleton is not initialized */ + if(g_state.skel) + { + return pman_detach_all_programs(); + } + return 0; } #ifdef TEST_HELPERS diff --git a/userspace/libpman/src/lifecycle.c b/userspace/libpman/src/lifecycle.c index 1bc35becfa..0025cc24ab 100644 --- a/userspace/libpman/src/lifecycle.c +++ b/userspace/libpman/src/lifecycle.c @@ -40,22 +40,23 @@ int pman_load_probe() void pman_close_probe() { - if(!g_state.cons_pos) + if(g_state.cons_pos) { free(g_state.cons_pos); } - if(!g_state.prod_pos) + if(g_state.prod_pos) { free(g_state.prod_pos); } - if(!g_state.skel) + if(g_state.skel) { + bpf_probe__detach(g_state.skel); bpf_probe__destroy(g_state.skel); } - if(!g_state.rb_manager) + if(g_state.rb_manager) { ring_buffer__free(g_state.rb_manager); } diff --git a/userspace/libpman/src/state.c b/userspace/libpman/src/state.c index bc338fc887..3f317a3354 100644 --- a/userspace/libpman/src/state.c +++ b/userspace/libpman/src/state.c @@ -22,16 +22,21 @@ limitations under the License. #include #include "state.h" -struct internal_state g_state; +struct internal_state g_state = {}; void pman_print_error(const char* error_message) { if(!error_message) { - fprintf(stderr, "libpman: No specific message available (errno: %d | message: %s)\n", errno, strerror(errno)); + return; } - else + + if(errno != 0) { fprintf(stderr, "libpman: %s (errno: %d | message: %s)\n", error_message, errno, strerror(errno)); } -} \ No newline at end of file + else + { + fprintf(stderr, "libpman: %s\n", error_message); + } +} From 7bcd0f9d9b95c36a55e3a33051cc66009df24f1a Mon Sep 17 00:00:00 2001 From: Andrea Terzolo Date: Fri, 13 Jan 2023 10:22:57 +0100 Subject: [PATCH 2/6] new: support multiple CPUs per buffer Signed-off-by: Andrea Terzolo --- driver/modern_bpf/maps/maps.h | 11 +- userspace/libpman/include/libpman.h | 29 ++- userspace/libpman/src/capture.c | 12 +- userspace/libpman/src/configuration.c | 81 ++++++- userspace/libpman/src/maps.c | 6 +- userspace/libpman/src/ringbuffer.c | 210 ++++++++++-------- userspace/libpman/src/state.h | 10 +- .../engine/modern_bpf/modern_bpf_public.h | 5 +- .../engine/modern_bpf/scap_modern_bpf.c | 24 +- .../engine/modern_bpf/scap_modern_bpf.h | 7 +- 10 files changed, 260 insertions(+), 135 deletions(-) diff --git a/driver/modern_bpf/maps/maps.h b/driver/modern_bpf/maps/maps.h index 4b6352e417..5f8ac193e2 100644 --- a/driver/modern_bpf/maps/maps.h +++ b/driver/modern_bpf/maps/maps.h @@ -139,7 +139,7 @@ struct /** * @brief For every CPU on the system we have a counter - * map where we store the number of events correcty pushed + * map where we store the number of events correctly pushed * and the number of events dropped. */ struct @@ -154,9 +154,7 @@ struct /*=============================== RINGBUF MAP ===============================*/ /** - * @brief We will have a ringbuf map for every CPU on the system. - * The dimension of the single ringbuf and the number of - * ringbuf maps are set in userspace. + * @brief We use this map to let the verifier understand the content of our array of maps (`ringbuf_maps`) */ struct ringbuf_map { @@ -164,8 +162,9 @@ struct ringbuf_map }; /** - * @brief This array of maps will contain a ringbuf map for every CPU - * on the system. + * @brief This array of maps will contain a variable number of ring buffers + * according to the user-provided configuration. It could also contain only + * one buffer shared between all CPUs. */ struct { diff --git a/userspace/libpman/include/libpman.h b/userspace/libpman/include/libpman.h index 36d94dc6c8..0351bced55 100644 --- a/userspace/libpman/include/libpman.h +++ b/userspace/libpman/include/libpman.h @@ -26,7 +26,7 @@ extern "C" #endif /* `libpman` return values convention: - * In case of success `0` is return otherwise `errno`. If `errno` is not + * In case of success `0` is returned otherwise `errno`. If `errno` is not * available `-1` is returned. * * Please Note: @@ -47,17 +47,24 @@ extern "C" * * @param verbosity use `true` if you want to activate libbpf verbosity. * @param buf_bytes_dim dimension of a single per-CPU buffer in bytes. + * @param cpus_for_each_buffer number of CPUs to which we want to associate a ring buffer. + * @param allocate_online_only if true, allocate ring buffers taking only into account online CPUs. * @return `0` on success, `-1` in case of error. */ - int pman_init_state(bool verbosity, unsigned long buf_bytes_dim); + int pman_init_state(bool verbosity, unsigned long buf_bytes_dim, uint16_t cpus_for_each_buffer, bool allocate_online_only); /** - * @brief Return the number of available CPUs on the system, not the - * online CPUs! + * @brief Clear the `libpman` global state before it is used. + * This API could be useful if we open the modern bpf engine multiple times. + */ + void pman_clear_state(void); + + /** + * @brief Return the number of allocated ring buffers. * - * @return number of available CPUs on success, `-1` in case of error. + * @return number of allocated ring buffers. */ - int pman_get_cpus_number(void); + int pman_get_required_buffers(void); ///////////////////////////// // PROBE LIFECYCLE @@ -225,10 +232,10 @@ extern "C" * * @param event_ptr in case of success return a pointer * to the event, otherwise return NULL. - * @param cpu_id in case of success returns the id of the CPU - * on which we have found the event, otherwise return `-1`. + * @param buffer_id in case of success returns the id of the ring buffer + * from which we retrieved the event, otherwise return `-1`. */ - void pman_consume_first_from_buffers(void** event_ptr, int16_t *cpu_id); + void pman_consume_first_event(void** event_ptr, int16_t* buffer_id); ///////////////////////////// // CAPTURE (EXCHANGE VALUES WITH BPF SIDE) @@ -413,7 +420,7 @@ extern "C" * @brief Return `true` if all ring buffers are full. To state * that a ring buffer is full we check that the free space is less * than the `threshold` - * + * * @param threshold used to check if a buffer is full * @return `true` if all buffers are full, otherwise `false` */ @@ -421,7 +428,7 @@ extern "C" /** * @brief Get the producer pos for the required ring - * + * * @param ring_num ring for which we want to obtain the producer pos * @return producer pos as an unsigned long */ diff --git a/userspace/libpman/src/capture.c b/userspace/libpman/src/capture.c index c4bd3ece98..92d06d485a 100644 --- a/userspace/libpman/src/capture.c +++ b/userspace/libpman/src/capture.c @@ -66,7 +66,7 @@ int pman_print_stats() return errno; } - for(int index = 0; index < g_state.n_cpus; index++) + for(int index = 0; index < g_state.n_possible_cpus; index++) { if(bpf_map_lookup_elem(counter_maps_fd, &index, &cnt_map) < 0) { @@ -114,7 +114,10 @@ int pman_get_scap_stats(void *scap_stats_struct) * - stats->n_preemptions */ - for(int index = 0; index < g_state.n_cpus; index++) + /* We always take statistics from all the CPUs, even if some of them are not online. + * If the CPU is not online the counter map will be empty. + */ + for(int index = 0; index < g_state.n_possible_cpus; index++) { if(bpf_map_lookup_elem(counter_maps_fd, &index, &cnt_map) < 0) { @@ -146,7 +149,10 @@ int pman_get_n_tracepoint_hit(long *n_events_per_cpu) return errno; } - for(int index = 0; index < g_state.n_cpus; index++) + /* We always take statistics from all the CPUs, even if some of them are not online. + * If the CPU is not online the counter map will be empty. + */ + for(int index = 0; index < g_state.n_possible_cpus; index++) { if(bpf_map_lookup_elem(counter_maps_fd, &index, &cnt_map) < 0) { diff --git a/userspace/libpman/src/configuration.c b/userspace/libpman/src/configuration.c index 86829dacac..d4e649bbd8 100644 --- a/userspace/libpman/src/configuration.c +++ b/userspace/libpman/src/configuration.c @@ -44,8 +44,27 @@ static void setup_libbpf_logging(bool verbosity) } } -int pman_init_state(bool verbosity, unsigned long buf_bytes_dim) +void pman_clear_state() { + g_state.skel = NULL; + g_state.rb_manager = NULL; + g_state.n_possible_cpus = 0; + g_state.n_interesting_cpus = 0; + g_state.allocate_online_only = false; + g_state.n_required_buffers = 0; + g_state.cpus_for_each_buffer = 0; + g_state.ringbuf_pos = 0; + g_state.cons_pos = NULL; + g_state.prod_pos = NULL; + g_state.inner_ringbuf_map_fd = 0; + g_state.buffer_bytes_dim = 0; + g_state.last_ring_read = -1; + g_state.last_event_size = 0; +} + +int pman_init_state(bool verbosity, unsigned long buf_bytes_dim, uint16_t cpus_for_each_buffer, bool allocate_online_only) +{ + char error_message[MAX_ERROR_MESSAGE_LEN]; /* `LIBBPF_STRICT_ALL` turns on all supported strict features * of libbpf to simulate libbpf v1.0 behavior. @@ -57,14 +76,64 @@ int pman_init_state(bool verbosity, unsigned long buf_bytes_dim) setup_libbpf_logging(verbosity); /* Set the available number of CPUs inside the internal state. */ - g_state.n_cpus = libbpf_num_possible_cpus(); - if(g_state.n_cpus <= 0) + g_state.n_possible_cpus = libbpf_num_possible_cpus(); + if(g_state.n_possible_cpus <= 0) { pman_print_error("no available cpus"); return -1; } - /* Set the dimension of a single per-CPU ring buffer. */ + g_state.allocate_online_only = allocate_online_only; + + if(g_state.allocate_online_only) + { + ssize_t online_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if(online_cpus != -1) + { + /* We will allocate buffers only for online CPUs */ + g_state.n_interesting_cpus = online_cpus; + } + else + { + /* Fallback to all available CPU even if the `allocate_online_only` flag is set to `true` */ + g_state.n_interesting_cpus = g_state.n_possible_cpus; + } + } + else + { + /* We will allocate buffers only for all available CPUs */ + g_state.n_interesting_cpus = g_state.n_possible_cpus; + } + + /* We are requiring a buffer every `cpus_for_each_buffer` CPUs, + * but `cpus_for_each_buffer` is greater than our possible CPU number! + */ + if(cpus_for_each_buffer > g_state.n_interesting_cpus) + { + snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "we are requiring a buffer every '%d' CPUs, but '%d' is greater than our interesting CPU number (%d)!", cpus_for_each_buffer, cpus_for_each_buffer, g_state.n_interesting_cpus); + pman_print_error((const char*)error_message); + return -1; + } + + /* `0` is a special value that means a single ring buffer shared between all the CPUs */ + if(cpus_for_each_buffer == 0) + { + /* We want a single ring buffer so 1 ring buffer for all the interesting CPUs we have */ + g_state.cpus_for_each_buffer = g_state.n_interesting_cpus; + } + else + { + g_state.cpus_for_each_buffer = cpus_for_each_buffer; + } + + /* Set the number of ring buffers we need */ + g_state.n_required_buffers = g_state.n_interesting_cpus / g_state.cpus_for_each_buffer; + /* If we have some remaining CPUs it means that we need another buffer */ + if((g_state.n_interesting_cpus % g_state.cpus_for_each_buffer) != 0) + { + g_state.n_required_buffers++; + } + /* Set the dimension of a single ring buffer */ g_state.buffer_bytes_dim = buf_bytes_dim; /* These will be used during the ring buffer consumption phase. */ @@ -73,7 +142,7 @@ int pman_init_state(bool verbosity, unsigned long buf_bytes_dim) return 0; } -int pman_get_cpus_number() +int pman_get_required_buffers() { - return g_state.n_cpus; + return g_state.n_required_buffers; } diff --git a/userspace/libpman/src/maps.c b/userspace/libpman/src/maps.c index b88d8d09bc..ad558916a4 100644 --- a/userspace/libpman/src/maps.c +++ b/userspace/libpman/src/maps.c @@ -244,7 +244,8 @@ int pman_fill_extra_event_prog_tail_table() static int size_auxiliary_maps() { - if(bpf_map__set_max_entries(g_state.skel->maps.auxiliary_maps, g_state.n_cpus)) + /* We always allocate auxiliary maps from all the CPUs, even if some of them are not online. */ + if(bpf_map__set_max_entries(g_state.skel->maps.auxiliary_maps, g_state.n_possible_cpus)) { pman_print_error("unable to set max entries for 'auxiliary_maps'"); return errno; @@ -254,7 +255,8 @@ static int size_auxiliary_maps() static int size_counter_maps() { - if(bpf_map__set_max_entries(g_state.skel->maps.counter_maps, g_state.n_cpus)) + /* We always allocate counter maps from all the CPUs, even if some of them are not online. */ + if(bpf_map__set_max_entries(g_state.skel->maps.counter_maps, g_state.n_possible_cpus)) { pman_print_error(" unable to set max entries for 'counter_maps'"); return errno; diff --git a/userspace/libpman/src/ringbuffer.c b/userspace/libpman/src/ringbuffer.c index 1cdd3e4b98..3870c6304b 100644 --- a/userspace/libpman/src/ringbuffer.c +++ b/userspace/libpman/src/ringbuffer.c @@ -26,6 +26,8 @@ limitations under the License. #include "ringbuffer_definitions.h" +/* Utility functions object loading */ + /* This must be done to please the verifier! At load-time, the verifier must know the * size of a map inside the array. */ @@ -55,7 +57,11 @@ static int ringbuf_array_set_inner_map() static int ringbuf_array_set_max_entries() { - if(bpf_map__set_max_entries(g_state.skel->maps.ringbuf_maps, g_state.n_cpus)) + /* We always allocate a number of entries equal to the available CPUs. + * This doesn't mean that we allocate a ring buffer for every available CPU, + * it means only that every CPU will have an associated entry. + */ + if(bpf_map__set_max_entries(g_state.skel->maps.ringbuf_maps, g_state.n_possible_cpus)) { pman_print_error("unable to set max entries for the ringbuf_array"); return errno; @@ -66,8 +72,8 @@ static int ringbuf_array_set_max_entries() static int allocate_consumer_producer_positions() { g_state.ringbuf_pos = 0; - g_state.cons_pos = (unsigned long *)calloc(g_state.n_cpus, sizeof(unsigned long)); - g_state.prod_pos = (unsigned long *)calloc(g_state.n_cpus, sizeof(unsigned long)); + g_state.cons_pos = (unsigned long *)calloc(g_state.n_required_buffers, sizeof(unsigned long)); + g_state.prod_pos = (unsigned long *)calloc(g_state.n_required_buffers, sizeof(unsigned long)); if(g_state.cons_pos == NULL || g_state.prod_pos == NULL) { pman_print_error("failed to alloc memory for cons_pos and prod_pos"); @@ -76,6 +82,7 @@ static int allocate_consumer_producer_positions() return 0; } +/* Before loading */ int pman_prepare_ringbuf_array_before_loading() { int err; @@ -86,122 +93,145 @@ int pman_prepare_ringbuf_array_before_loading() return err; } -static int create_first_ringbuffer_map() +static bool is_cpu_online(uint16_t cpu_id) { - int ringubuf_array_fd = -1; - int ringbuf_map_fd = -1; - int index = 0; - - /* We don't need anymore the inner map, close it. */ - close(g_state.inner_ringbuf_map_fd); - - /* `ringbuf_array` is a maps array, every map inside it is a `BPF_MAP_TYPE_RINGBUF`. */ - ringubuf_array_fd = bpf_map__fd(g_state.skel->maps.ringbuf_maps); - if(ringubuf_array_fd <= 0) + /* CPU 0 is always online */ + if(cpu_id == 0) { - pman_print_error("failed to get the ringubuf_array"); - return errno; + return true; } - /* create the first ringbuf map. */ - ringbuf_map_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, g_state.buffer_bytes_dim, NULL); - if(ringbuf_map_fd <= 0) + char filename[FILENAME_MAX]; + int online = 0; + snprintf(filename, sizeof(filename), "/sys/devices/system/cpu/cpu%d/online", cpu_id); + FILE *fp = fopen(filename, "r"); + if(fp == NULL) { - pman_print_error("failed to create the first ringbuf map"); - goto clean_create_first_ringbuffer_map; + /* When missing NUMA properties, CPUs do not expose online information. + * Fallback at considering them online if we can at least reach their folder. + * This is useful for example for raspPi devices. + * See: https://github.com/kubernetes/kubernetes/issues/95039 + */ + snprintf(filename, sizeof(filename), "/sys/devices/system/cpu/cpu%d/", cpu_id); + if(access(filename, F_OK) == 0) + { + return true; + } + else + { + return false; + } } - /* add the first ringbuf map into the array. */ - if(bpf_map_update_elem(ringubuf_array_fd, &index, &ringbuf_map_fd, BPF_ANY)) + fscanf(fp, "%d", &online); + return online == 1; +} + +/* After loading */ +int pman_finalize_ringbuf_array_after_loading() +{ + int ringubuf_array_fd = -1; + char error_message[MAX_ERROR_MESSAGE_LEN]; + int *ringbufs_fds = (int *)calloc(g_state.n_required_buffers, sizeof(int)); + bool success = false; + + /* We don't need anymore the inner map, close it. */ + close(g_state.inner_ringbuf_map_fd); + + /* Create ring buffer maps. */ + for(int i = 0; i < g_state.n_required_buffers; i++) { - pman_print_error("failed to add the first ringbuf map into the array"); - goto clean_create_first_ringbuffer_map; + ringbufs_fds[i] = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, g_state.buffer_bytes_dim, NULL); + if(ringbufs_fds[i] <= 0) + { + snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "failed to create the ringbuf map for CPU '%d'. (If you get memory allocation errors try to reduce the buffer dimension)", i); + pman_print_error((const char *)error_message); + goto clean_percpu_ring_buffers; + } } - g_state.rb_manager = ring_buffer__new(ringbuf_map_fd, NULL, NULL, NULL); + /* Create the ringbuf manager */ + g_state.rb_manager = ring_buffer__new(ringbufs_fds[0], NULL, NULL, NULL); if(!g_state.rb_manager) { - pman_print_error("failed to instantiate the ringbuf manager. (If you get memory allocation errors try to reduce the buffer dimension)"); - goto clean_create_first_ringbuffer_map; + pman_print_error("failed to instantiate the ringbuf manager."); + goto clean_percpu_ring_buffers; } - return 0; - -clean_create_first_ringbuffer_map: - close(ringbuf_map_fd); - close(ringubuf_array_fd); - return errno; -} - -static int create_remaining_ringbuffer_maps() -{ - int ringubuf_array_fd = -1; - int ringbuf_map_fd = -1; - char error_message[MAX_ERROR_MESSAGE_LEN]; - /* the first ringbuf map is already inserted into the array. - * See `create_first_ringbuffer_map()` function. + /* Add all remaining buffers into the manager. + * We start from 1 because the first one is + * used to instantiate the manager. */ - int index = 1; + for(int i = 1; i < g_state.n_required_buffers; i++) + { + if(ring_buffer__add(g_state.rb_manager, ringbufs_fds[i], NULL, NULL)) + { + snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "failed to add the ringbuf map for CPU %d into the manager", i); + pman_print_error((const char *)error_message); + goto clean_percpu_ring_buffers; + } + } - /* get the ringbuf_array with a map already in it. */ + /* `ringbuf_array` is a maps array, every map inside it is a `BPF_MAP_TYPE_RINGBUF`. */ ringubuf_array_fd = bpf_map__fd(g_state.skel->maps.ringbuf_maps); if(ringubuf_array_fd <= 0) { - pman_print_error("failed to get a not empty ringubuf_array"); + pman_print_error("failed to get the ringubuf_array"); return errno; } - /* for all CPUs add the rinugbuf map into the array and add it also - * into the ringbuf manager. Please note: we have already initialized the - * the ringbuf_array and the manager with the map for the CPU `0`. - */ - for(index = 1; index < g_state.n_cpus; index++) + /* We need to associate every CPU to the right ring buffer */ + int ringbuf_id = 0; + int reached = 0; + for(int i = 0; i < g_state.n_possible_cpus; i++) { - ringbuf_map_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, g_state.buffer_bytes_dim, NULL); - if(ringbuf_map_fd <= 0) + /* If we want to allocate only buffers for online CPUs and the CPU is online, fill its + * ring buffer array entry, otherwise we can go on with the next online CPU + */ + if(g_state.allocate_online_only && !is_cpu_online(i)) { - snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "failed to create the ringbuf map for CPU %d", index); - pman_print_error((const char *)error_message); - goto clean_create_remaining_ringbuffer_maps; + continue; } - if(bpf_map_update_elem(ringubuf_array_fd, &index, &ringbuf_map_fd, BPF_ANY)) + if(bpf_map_update_elem(ringubuf_array_fd, &i, &ringbufs_fds[ringbuf_id], BPF_ANY)) { - snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "failed to add the ringbuf map for CPU %d into the array", index); + snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "failed to add the ringbuf map for CPU '%d' to ringbuf '%d'", i, ringbuf_id); pman_print_error((const char *)error_message); - goto clean_create_remaining_ringbuffer_maps; + goto clean_percpu_ring_buffers; } - /* add the new ringbuf map into the manager. */ - if(ring_buffer__add(g_state.rb_manager, ringbuf_map_fd, NULL, NULL)) + if(++reached == g_state.cpus_for_each_buffer) { - snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "failed to add the ringbuf map for CPU %d into the manager", index); - pman_print_error((const char *)error_message); - goto clean_create_remaining_ringbuffer_maps; + /* we need to switch to the next buffer */ + reached = 0; + ringbuf_id++; } } - return 0; + success = true; + +clean_percpu_ring_buffers: + for(int i = 0; i < g_state.n_required_buffers; i++) + { + if(ringbufs_fds[i]) + { + close(ringbufs_fds[i]); + } + } + free(ringbufs_fds); + + if(success) + { + return 0; + } -clean_create_remaining_ringbuffer_maps: - close(ringbuf_map_fd); close(ringubuf_array_fd); + if(g_state.rb_manager) + { + ring_buffer__free(g_state.rb_manager); + } return errno; } -/* Create all the ringbuffer maps inside the ringbuffer_array and assign - * them to the manager. Note, the first ringbuffer map is separated from - * the others because we first need to create the ringbuffer manager with - * just one map `ring_buffer__new`. After having instanciating the manager - * we can add to it all the other maps with `ring_buffer__add`. - */ -int pman_finalize_ringbuf_array_after_loading() -{ - int err; - err = create_first_ringbuffer_map(); - err = err ?: create_remaining_ringbuffer_maps(); - return err; -} - static inline void *ringbuf__get_first_ring_event(struct ring *r, int pos) { int *len_ptr = NULL; @@ -209,7 +239,7 @@ static inline void *ringbuf__get_first_ring_event(struct ring *r, int pos) void *sample = NULL; /* If the consumer reaches the producer update the producer position to - * get the newly collected events. + * get the newly collected events. */ if(g_state.cons_pos[pos] >= g_state.prod_pos[pos]) { @@ -238,7 +268,7 @@ static inline void *ringbuf__get_first_ring_event(struct ring *r, int pos) return sample; } -static void ringbuf__consume_first_event(struct ring_buffer *rb, struct ppm_evt_hdr **event_ptr, int16_t *cpu_id) +static void ringbuf__consume_first_event(struct ring_buffer *rb, struct ppm_evt_hdr **event_ptr, int16_t *buffer_id) { uint64_t min_ts = 0xffffffffffffffffLL; struct ppm_evt_hdr *tmp_pointer = NULL; @@ -273,15 +303,15 @@ static void ringbuf__consume_first_event(struct ring_buffer *rb, struct ppm_evt_ } *event_ptr = tmp_pointer; - *cpu_id = tmp_ring; + *buffer_id = tmp_ring; g_state.last_ring_read = tmp_ring; g_state.last_event_size = tmp_cons_increment; } -/* This API must be used if we want to get the first event according to its timestamp */ -void pman_consume_first_from_buffers(void **event_ptr, int16_t *cpu_id) +/* Consume */ +void pman_consume_first_event(void **event_ptr, int16_t *buffer_id) { - ringbuf__consume_first_event(g_state.rb_manager, (struct ppm_evt_hdr **)event_ptr, cpu_id); + ringbuf__consume_first_event(g_state.rb_manager, (struct ppm_evt_hdr **)event_ptr, buffer_id); } #ifdef TEST_HELPERS @@ -292,7 +322,7 @@ void pman_consume_first_from_buffers(void **event_ptr, int16_t *cpu_id) */ static bool pman_is_ringbuffer_full(int ring_num, unsigned long threshold) { - if(ring_num < 0 || ring_num >= g_state.n_cpus) + if(ring_num < 0 || ring_num >= g_state.n_possible_cpus) { return -1; } @@ -317,7 +347,7 @@ bool pman_are_all_ringbuffers_full(unsigned long threshold) int attempt = 0; /* Performs 3 attempts just to be sure that all the buffers are empty. */ - while(pos < g_state.n_cpus) + while(pos < g_state.n_possible_cpus) { if(!pman_is_ringbuffer_full(pos, threshold)) { @@ -326,7 +356,7 @@ bool pman_are_all_ringbuffers_full(unsigned long threshold) pos++; - if(pos == g_state.n_cpus && attempt != 2) + if(pos == g_state.n_possible_cpus && attempt != 2) { printf("Stable, attempt %d\n", attempt); pos = 0; diff --git a/userspace/libpman/src/state.h b/userspace/libpman/src/state.h index f5f3612aaf..0dcae390ec 100644 --- a/userspace/libpman/src/state.h +++ b/userspace/libpman/src/state.h @@ -29,14 +29,18 @@ struct internal_state { struct bpf_probe* skel; /* bpf skeleton with all programs and maps. */ struct ring_buffer* rb_manager; /* ring_buffer manager with all per-CPU ringbufs. */ - int16_t n_cpus; /* number of system available CPUs. */ + int16_t n_possible_cpus; /* number of possible system CPUs (online and not). */ + int16_t n_interesting_cpus; /* according to userspace configuration we can consider only online CPUs or all available CPUs. */ + bool allocate_online_only; /* If true we allocate ring buffers only for online CPUs */ + uint32_t n_required_buffers; /* number of ring buffers we need to allocate */ + uint16_t cpus_for_each_buffer; /* Users want a ring buffer every `cpus_for_each_buffer` CPUs */ int ringbuf_pos; /* actual ringbuf we are considering. */ unsigned long* cons_pos; /* every ringbuf has a consumer position. */ unsigned long* prod_pos; /* every ringbuf has a producer position. */ int32_t inner_ringbuf_map_fd; /* inner map used to configure the ringbuf array before loading phase. */ unsigned long buffer_bytes_dim; /* dimension of a single per-CPU ringbuffer in bytes. */ - int last_ring_read; /* Last ring from which we have correctly read an event. Could be `-1` if there were no successful reads. */ - unsigned long last_event_size; /* Last event correctly read. Could be `0` if there were no successful reads. */ + int last_ring_read; /* Last ring from which we have correctly read an event. Could be `-1` if there were no successful reads. */ + unsigned long last_event_size; /* Last event correctly read. Could be `0` if there were no successful reads. */ }; extern struct internal_state g_state; diff --git a/userspace/libscap/engine/modern_bpf/modern_bpf_public.h b/userspace/libscap/engine/modern_bpf/modern_bpf_public.h index 2ddeeb0e11..fd35d43528 100644 --- a/userspace/libscap/engine/modern_bpf/modern_bpf_public.h +++ b/userspace/libscap/engine/modern_bpf/modern_bpf_public.h @@ -16,6 +16,7 @@ limitations under the License. #include #define MODERN_BPF_ENGINE "modern_bpf" +#define DEFAULT_CPU_FOR_EACH_BUFFER 1 #ifdef __cplusplus extern "C" @@ -24,7 +25,9 @@ extern "C" struct scap_modern_bpf_engine_params { - unsigned long buffer_bytes_dim; ///< Dimension of a single per-CPU buffer in bytes. Please note: this buffer will be mapped twice in the process virtual memory, so pay attention to its size. + uint16_t cpus_for_each_buffer; ///< [EXPERIMENTAL] We will allocate a ring buffer every `cpus_for_each_buffer` CPUs. `0` is a special value and means a single ring buffer shared between all the CPUs. + bool allocate_online_only; ///< [EXPERIMENTAL] Allocate ring buffers only for online CPUs. The number of ring buffers allocated changes according to the `cpus_for_each_buffer` param. Please note: this buffer will be mapped twice both kernel and userspace-side, so pay attention to its size. + unsigned long buffer_bytes_dim; ///< Dimension of a ring buffer in bytes. The number of ring buffers allocated changes according to the `cpus_for_each_buffer` param. Please note: this buffer will be mapped twice both kernel and userspace-side, so pay attention to its size. }; #ifdef __cplusplus diff --git a/userspace/libscap/engine/modern_bpf/scap_modern_bpf.c b/userspace/libscap/engine/modern_bpf/scap_modern_bpf.c index f5de7eb63d..9f7ab6d93f 100644 --- a/userspace/libscap/engine/modern_bpf/scap_modern_bpf.c +++ b/userspace/libscap/engine/modern_bpf/scap_modern_bpf.c @@ -128,9 +128,13 @@ static void scap_modern_bpf__free_engine(struct scap_engine_handle engine) free(engine.m_handle); } -static int32_t scap_modern_bpf__next(struct scap_engine_handle engine, OUT scap_evt** pevent, OUT uint16_t* pcpuid) +/* The third parameter is not the CPU number from which we extract the event but the ring buffer number. + * For the old BPF probe and the kernel module the number of CPUs is equal to the number of buffers since we always use a per-CPU approach. + */ +static int32_t scap_modern_bpf__next(struct scap_engine_handle engine, OUT scap_evt** pevent, OUT uint16_t* buffer_id) { - pman_consume_first_from_buffers((void**)pevent, pcpuid); + pman_consume_first_event((void**)pevent, buffer_id); + if((*pevent) == NULL) { /* The first time we sleep 500 us, if we have consecutive timeouts we can reach also 30 ms. */ @@ -210,10 +214,12 @@ int32_t scap_modern_bpf__init(scap_t* handle, scap_open_args* oargs) struct scap_modern_bpf_engine_params* params = oargs->engine_params; bool libbpf_verbosity = false; + pman_clear_state(); + /* Some checks to test if we can use the modern BPF probe * - check the ring-buffer dimension in bytes. * - check the minimum required kernel version. - * + * * Please note the presence of BTF is directly checked by `libbpf` see `bpf_object__load_vmlinux_btf` method. */ if(check_buffer_bytes_dim(handle->m_lasterr, params->buffer_bytes_dim) != SCAP_SUCCESS) @@ -226,8 +232,11 @@ int32_t scap_modern_bpf__init(scap_t* handle, scap_open_args* oargs) return SCAP_FAILURE; } - /* Initialize the libpman internal state */ - if(pman_init_state(libbpf_verbosity, params->buffer_bytes_dim)) + /* Initialize the libpman internal state. + * Validation of `cpus_for_each_buffer` is made inside libpman + * since this is the unique place where we have the number of CPUs + */ + if(pman_init_state(libbpf_verbosity, params->buffer_bytes_dim, params->cpus_for_each_buffer, params->allocate_online_only)) { snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "unable to configure the libpman state."); return SCAP_FAILURE; @@ -236,9 +245,6 @@ int32_t scap_modern_bpf__init(scap_t* handle, scap_open_args* oargs) /* Set an initial sleep time in case of timeouts. */ engine.m_handle->m_retry_us = BUFFER_EMPTY_WAIT_TIME_US_START; - /* Return the number of system available CPUs, not online CPUs. */ - engine.m_handle->m_num_cpus = pman_get_cpus_number(); - /* Load and attach */ ret = pman_open_probe(); ret = ret ?: pman_prepare_ringbuf_array_before_loading(); @@ -278,7 +284,7 @@ int32_t scap_modern_bpf__close(struct scap_engine_handle engine) static uint32_t scap_modern_bpf__get_n_devs(struct scap_engine_handle engine) { - return engine.m_handle->m_num_cpus; + return pman_get_required_buffers(); } int32_t scap_modern_bpf__get_stats(struct scap_engine_handle engine, OUT scap_stats* stats) diff --git a/userspace/libscap/engine/modern_bpf/scap_modern_bpf.h b/userspace/libscap/engine/modern_bpf/scap_modern_bpf.h index 3cd9cc2b7a..458aec5002 100644 --- a/userspace/libscap/engine/modern_bpf/scap_modern_bpf.h +++ b/userspace/libscap/engine/modern_bpf/scap_modern_bpf.h @@ -26,8 +26,7 @@ struct scap; struct modern_bpf_engine { - size_t m_num_cpus; - unsigned long m_retry_us; - char* m_lasterr; - interesting_tp_set open_tp_set; + unsigned long m_retry_us; /* Microseconds to wait if all ring buffers are empty */ + char* m_lasterr; /* Last error caught by the engine */ + interesting_tp_set open_tp_set; /* Interesting tracepoints */ }; From e50d37cb15cd40068624d8d8e18c010867992f17 Mon Sep 17 00:00:00 2001 From: Andrea Terzolo Date: Fri, 13 Jan 2023 10:38:04 +0100 Subject: [PATCH 3/6] update: propagate support to scap-open Signed-off-by: Andrea Terzolo --- .../libscap/examples/01-open/scap_open.c | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/userspace/libscap/examples/01-open/scap_open.c b/userspace/libscap/examples/01-open/scap_open.c index f168cfdc22..6cfa998916 100644 --- a/userspace/libscap/examples/01-open/scap_open.c +++ b/userspace/libscap/examples/01-open/scap_open.c @@ -39,6 +39,8 @@ limitations under the License. #define EVENT_TYPE_OPTION "--evt_type" #define BUFFER_OPTION "--buffer_dim" #define SIMPLE_SET_OPTION "--simple_set" +#define CPUS_FOR_EACH_BUFFER_MODE "--cpus_for_buf" +#define ALLOCATE_ONLINE_ONLY_MODE "--online_only" /* PRINT */ #define VALIDATION_OPTION "--validate_syscalls" @@ -709,6 +711,7 @@ void print_help() printf("'%s ': number of events to catch before terminating. (default: UINT64_MAX)\n", NUM_EVENTS_OPTION); printf("'%s ': every event of this type will be printed to console. (default: -1, no print)\n", EVENT_TYPE_OPTION); printf("'%s ': dimension in bytes of a single per CPU buffer.\n", BUFFER_OPTION); + printf("'%s ': allocate a ring buffer for every `cpus_for_each_buffer` CPUs.\n", CPUS_FOR_EACH_BUFFER_MODE); printf("\n------> VALIDATION OPTIONS\n"); printf("'%s': validation checks.\n", VALIDATION_OPTION); printf("\n------> PRINT OPTIONS\n"); @@ -731,7 +734,8 @@ void print_scap_source() } else if(strcmp(oargs.engine_name, MODERN_BPF_ENGINE) == 0) { - printf("* Modern BPF probe.\n"); + struct scap_modern_bpf_engine_params* params = oargs.engine_params; + printf("* Modern BPF probe, 1 ring buffer every %d CPUs\n", params->cpus_for_each_buffer); } else if(strcmp(oargs.engine_name, SAVEFILE_ENGINE) == 0) { @@ -815,6 +819,8 @@ void parse_CLI_options(int argc, char** argv) oargs.engine_name = MODERN_BPF_ENGINE; oargs.mode = SCAP_MODE_LIVE; modern_bpf_params.buffer_bytes_dim = buffer_bytes_dim; + modern_bpf_params.cpus_for_each_buffer = DEFAULT_CPU_FOR_EACH_BUFFER; + modern_bpf_params.allocate_online_only = false; oargs.engine_params = &modern_bpf_params; } if(!strcmp(argv[i], SCAP_FILE_OPTION)) @@ -888,6 +894,22 @@ void parse_CLI_options(int argc, char** argv) { enable_simple_set(); } + /* This should be used only with the modern probe */ + if(!strcmp(argv[i], CPUS_FOR_EACH_BUFFER_MODE)) + { + if(!(i + 1 < argc)) + { + printf("\nYou need to specify also the number of CPUs. Bye!\n"); + exit(EXIT_FAILURE); + } + modern_bpf_params.cpus_for_each_buffer = atoi(argv[++i]); + } + /* This should be used only with the modern probe */ + if(!strcmp(argv[i], ALLOCATE_ONLINE_ONLY_MODE)) + { + modern_bpf_params.allocate_online_only = true; + } + /*=============================== CONFIGURATIONS ===========================*/ From 43b33a9c1eaa3b12c9462bbe666e2ae24a671e2f Mon Sep 17 00:00:00 2001 From: Andrea Terzolo Date: Fri, 13 Jan 2023 10:38:43 +0100 Subject: [PATCH 4/6] update: propagate support to sinsp Signed-off-by: Andrea Terzolo --- userspace/libsinsp/examples/test.cpp | 2 +- userspace/libsinsp/sinsp.cpp | 4 +++- userspace/libsinsp/sinsp.h | 6 +++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/userspace/libsinsp/examples/test.cpp b/userspace/libsinsp/examples/test.cpp index 92ef660d0c..15057f86e2 100644 --- a/userspace/libsinsp/examples/test.cpp +++ b/userspace/libsinsp/examples/test.cpp @@ -194,7 +194,7 @@ void open_engine(sinsp& inspector) } else if(!engine_string.compare(MODERN_BPF_ENGINE)) { - inspector.open_modern_bpf(buffer_bytes_dim, ppm_sc, tp_set); + inspector.open_modern_bpf(buffer_bytes_dim, DEFAULT_CPU_FOR_EACH_BUFFER, true, ppm_sc, tp_set); } else { diff --git a/userspace/libsinsp/sinsp.cpp b/userspace/libsinsp/sinsp.cpp index 70986307f0..1532c668f3 100644 --- a/userspace/libsinsp/sinsp.cpp +++ b/userspace/libsinsp/sinsp.cpp @@ -606,7 +606,7 @@ void sinsp::open_gvisor(const std::string& config_path, const std::string& root_ set_get_procs_cpu_from_driver(false); } -void sinsp::open_modern_bpf(unsigned long driver_buffer_bytes_dim, const std::unordered_set &ppm_sc_of_interest, const std::unordered_set &tp_of_interest) +void sinsp::open_modern_bpf(unsigned long driver_buffer_bytes_dim, uint16_t cpus_for_each_buffer, bool online_only, const std::unordered_set &ppm_sc_of_interest, const std::unordered_set &tp_of_interest) { scap_open_args oargs = factory_open_args(MODERN_BPF_ENGINE, SCAP_MODE_LIVE); @@ -617,6 +617,8 @@ void sinsp::open_modern_bpf(unsigned long driver_buffer_bytes_dim, const std::un /* Engine-specific args. */ struct scap_modern_bpf_engine_params params; params.buffer_bytes_dim = driver_buffer_bytes_dim; + params.cpus_for_each_buffer = cpus_for_each_buffer; + params.allocate_online_only = online_only; oargs.engine_params = ¶ms; open_common(&oargs); } diff --git a/userspace/libsinsp/sinsp.h b/userspace/libsinsp/sinsp.h index 0fb5619b5a..83c7e02d67 100644 --- a/userspace/libsinsp/sinsp.h +++ b/userspace/libsinsp/sinsp.h @@ -223,7 +223,11 @@ class SINSP_PUBLIC sinsp : public capture_stats_source virtual void open_savefile(const std::string &filename, int fd = 0); virtual void open_plugin(const std::string &plugin_name, const std::string &plugin_open_params); virtual void open_gvisor(const std::string &config_path, const std::string &root_path); - virtual void open_modern_bpf(unsigned long driver_buffer_bytes_dim = DEFAULT_DRIVER_BUFFER_BYTES_DIM, const std::unordered_set &ppm_sc_of_interest = {}, const std::unordered_set &tp_of_interest = {}); + /*[EXPERIMENTAL] This API could change between releases, we are trying to find the right configuration to deploy the modern bpf probe: + * `cpus_for_each_buffer` and `online_only` are the 2 experimental params. The first one allows associating more than one CPU to a single ring buffer. + * The last one allows allocating ring buffers only for online CPUs and not for all system-available CPUs. + */ + virtual void open_modern_bpf(unsigned long driver_buffer_bytes_dim = DEFAULT_DRIVER_BUFFER_BYTES_DIM, uint16_t cpus_for_each_buffer = DEFAULT_CPU_FOR_EACH_BUFFER, bool online_only = true, const std::unordered_set &ppm_sc_of_interest = {}, const std::unordered_set &tp_of_interest = {}); virtual void open_test_input(scap_test_input_data *data); scap_open_args factory_open_args(const char* engine_name, scap_mode_t scap_mode); From e7d5ef9ccf556dbb9a5ecd9883a5f4d8bbe89a22 Mon Sep 17 00:00:00 2001 From: Andrea Terzolo Date: Fri, 13 Jan 2023 11:17:53 +0100 Subject: [PATCH 5/6] tests: add new test suite for the modern probe Signed-off-by: Andrea Terzolo --- userspace/libscap/test/CMakeLists.txt | 6 + userspace/libscap/test/README.md | 20 + .../engines/modern_bpf/modern_bpf.cpp | 386 ++++++++++++++++++ 3 files changed, 412 insertions(+) create mode 100644 userspace/libscap/test/README.md create mode 100644 userspace/libscap/test/test_suites/engines/modern_bpf/modern_bpf.cpp diff --git a/userspace/libscap/test/CMakeLists.txt b/userspace/libscap/test/CMakeLists.txt index 33edceff0e..1b45528a9c 100644 --- a/userspace/libscap/test/CMakeLists.txt +++ b/userspace/libscap/test/CMakeLists.txt @@ -22,6 +22,12 @@ set(LIBSCAP_UNIT_TESTS_SOURCES scap_event.ut.cpp ) +# Modern BPF is supported only on kernel versions >= 5.8. +# To compile these tests you need to use the Cmake option `BUILD_LIBSCAP_MODERN_BPF=On` +if(BUILD_LIBSCAP_MODERN_BPF) + list(APPEND LIBSCAP_UNIT_TESTS_SOURCES ./test_suites/engines/modern_bpf/modern_bpf.cpp) +endif() + if (BUILD_LIBSCAP_GVISOR) list(APPEND LIBSCAP_UNIT_TESTS_SOURCES scap_gvisor_parsers.ut.cpp) include_directories(../engine/gvisor) diff --git a/userspace/libscap/test/README.md b/userspace/libscap/test/README.md new file mode 100644 index 0000000000..e31b737fae --- /dev/null +++ b/userspace/libscap/test/README.md @@ -0,0 +1,20 @@ +# Scap tests + +## Compile tests + +```bash +cmake -DUSE_BUNDLED_DEPS=On -DBUILD_BPF=True -DCREATE_TEST_TARGETS=On -DBUILD_LIBSCAP_GVISOR=Off .. +make unit-test-libscap +``` + +You can add tests for specific engines using their Cmake options: +- `-DBUILD_LIBSCAP_MODERN_BPF=On` +- `-BUILD_LIBSCAP_GVISOR=On` + +## Run tests + +From the build directory: + +```bash +sudo ./libscap/test/unit-test-libscap +``` \ No newline at end of file diff --git a/userspace/libscap/test/test_suites/engines/modern_bpf/modern_bpf.cpp b/userspace/libscap/test/test_suites/engines/modern_bpf/modern_bpf.cpp new file mode 100644 index 0000000000..5683bc4f88 --- /dev/null +++ b/userspace/libscap/test/test_suites/engines/modern_bpf/modern_bpf.cpp @@ -0,0 +1,386 @@ +#include "scap.h" +#include +#include +#include + +/* We are supposing that if we overcome this threshold, all buffers are full */ +#define MAX_ITERATIONS 300 + +scap_t* open_modern_bpf_engine(char* error_buf, int32_t* rc, unsigned long buffer_dim, uint16_t cpus_for_each_buffer, bool online_only, std::unordered_set tp_set = {}, std::unordered_set ppm_sc_set = {}) +{ + struct scap_open_args oargs = { + .engine_name = MODERN_BPF_ENGINE, + .mode = SCAP_MODE_LIVE, + }; + + /* If empty we fill with all tracepoints */ + if(tp_set.empty()) + { + for(int i = 0; i < TP_VAL_MAX; i++) + { + oargs.tp_of_interest.tp[i] = 1; + } + } + else + { + for(auto tp : tp_set) + { + oargs.tp_of_interest.tp[tp] = 1; + } + } + + /* If empty we fill with all syscalls */ + if(ppm_sc_set.empty()) + { + for(int i = 0; i < PPM_SC_MAX; i++) + { + oargs.ppm_sc_of_interest.ppm_sc[i] = 1; + } + } + else + { + for(auto ppm_sc : ppm_sc_set) + { + oargs.ppm_sc_of_interest.ppm_sc[ppm_sc] = 1; + } + } + + struct scap_modern_bpf_engine_params modern_bpf_params = { + .cpus_for_each_buffer = cpus_for_each_buffer, + .allocate_online_only = online_only, + .buffer_bytes_dim = buffer_dim, + }; + oargs.engine_params = &modern_bpf_params; + + return scap_open(&oargs, error_buf, rc); +} + +void check_event_is_not_overwritten(scap_t* h) +{ + /* Start the capture */ + ASSERT_EQ(scap_start_capture(h), SCAP_SUCCESS) << "unable to start the capture: " << scap_getlasterr(h) << std::endl; + + /* When the number of events is fixed for `MAX_ITERATIONS` we consider all the buffers full, this is just an approximation */ + scap_stats stats = {}; + uint64_t last_num_events = 0; + uint16_t iterations = 0; + + while(iterations < MAX_ITERATIONS || stats.n_drops == 0) + { + ASSERT_EQ(scap_get_stats(h, &stats), SCAP_SUCCESS) << "unable to get stats: " << scap_getlasterr(h) << std::endl; + if(last_num_events == (stats.n_evts - stats.n_drops)) + { + iterations++; + } + else + { + iterations = 0; + last_num_events = (stats.n_evts - stats.n_drops); + } + } + + /* Stop the capture */ + ASSERT_EQ(scap_stop_capture(h), SCAP_SUCCESS) << "unable to stop the capture: " << scap_getlasterr(h) << std::endl; + + /* The idea here is to check if an event is overwritten while we still have a pointer to it. + * Again this is only an approximation, we don't know if new events will be written in the buffer + * under test... + * + * We call `scap_next` keeping the pointer to the event. + * An event pointer becomes invalid when we call another `scap_next`, but until that moment it should be valid! + */ + scap_evt* evt = NULL; + uint16_t buffer_id; + + /* The first 'scap_next` could return a `SCAP_TIMEOUT` according to the chosen `buffer_mode` so we ignore it. */ + scap_next(h, &evt, &buffer_id); + + ASSERT_EQ(scap_next(h, &evt, &buffer_id), SCAP_SUCCESS) << "unable to get an event with `scap_next`: " << scap_getlasterr(h) << std::endl; + + last_num_events = 0; + iterations = 0; + + /* We save some event info to check if they are still valid after some new events */ + uint64_t prev_ts = evt->ts; + uint64_t prev_tid = evt->tid; + uint32_t prev_len = evt->len; + uint16_t prev_type = evt->type; + uint32_t prev_nparams = evt->nparams; + + /* Start again the capture */ + ASSERT_EQ(scap_start_capture(h), SCAP_SUCCESS) << "unable to restart the capture: " << scap_getlasterr(h) << std::endl; + + /* We use the same approximation as before */ + while(iterations < MAX_ITERATIONS) + { + ASSERT_EQ(scap_get_stats(h, &stats), SCAP_SUCCESS) << "unable to get stats: " << scap_getlasterr(h) << std::endl; + if(last_num_events == (stats.n_evts - stats.n_drops)) + { + iterations++; + } + else + { + iterations = 0; + last_num_events = (stats.n_evts - stats.n_drops); + } + } + + /* We check if the previously collected event is still valid */ + ASSERT_EQ(prev_ts, evt->ts) << "different timestamp" << std::endl; + ASSERT_EQ(prev_tid, evt->tid) << "different thread id" << std::endl; + ASSERT_EQ(prev_len, evt->len) << "different event len" << std::endl; + ASSERT_EQ(prev_type, evt->type) << "different event type" << std::endl; + ASSERT_EQ(prev_nparams, evt->nparams) << "different num params" << std::endl; +} + +TEST(modern_bpf, open_engine) +{ + char error_buffer[FILENAME_MAX] = {0}; + int ret = 0; + /* we want 1 ring buffer for each CPU */ + scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, 1, true); + ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine: " << error_buffer << std::endl; + scap_close(h); +} + +TEST(modern_bpf, empty_buffer_dim) +{ + char error_buffer[FILENAME_MAX] = {0}; + int ret = 0; + scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 0, 1, true); + ASSERT_TRUE(!h || ret != SCAP_SUCCESS) << "the buffer dimension is 0, we should fail: " << error_buffer << std::endl; + /* In case of failure the `scap_close(h)` is already called in the vtable `init` method */ +} + +TEST(modern_bpf, wrong_buffer_dim) +{ + char error_buffer[FILENAME_MAX] = {0}; + int ret = 0; + /* ring buffer dim is not a multiple of PAGE_SIZE */ + scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 1 + 4 * 4096, 1, true); + ASSERT_TRUE(!h || ret != SCAP_SUCCESS) << "the buffer dimension is not a multiple of the page size, we should fail: " << error_buffer << std::endl; +} + +TEST(modern_bpf, not_enough_possible_CPUs) +{ + char error_buffer[FILENAME_MAX] = {0}; + int ret = 0; + + ssize_t num_possible_CPUs = sysconf(_SC_NPROCESSORS_CONF); + + scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, num_possible_CPUs + 1, false); + ASSERT_TRUE(!h || ret != SCAP_SUCCESS) << "the CPUs required for each ring buffer are greater than the system possible CPUs, we should fail: " << error_buffer << std::endl; +} + +TEST(modern_bpf, not_enough_online_CPUs) +{ + char error_buffer[FILENAME_MAX] = {0}; + int ret = 0; + + ssize_t num_online_CPUs = sysconf(_SC_NPROCESSORS_ONLN); + + scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, num_online_CPUs + 1, true); + ASSERT_TRUE(!h || ret != SCAP_SUCCESS) << "the CPUs required for each ring buffer are greater than the system online CPUs, we should fail: " << error_buffer << std::endl; +} + +TEST(modern_bpf, one_buffer_per_possible_CPU) +{ + char error_buffer[FILENAME_MAX] = {0}; + int ret = 0; + scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, 1, false); + ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine with one ring buffer per CPU: " << error_buffer << std::endl; + + ssize_t num_possible_CPUs = sysconf(_SC_NPROCESSORS_CONF); + uint32_t num_expected_rings = scap_get_ndevs(h); + ASSERT_EQ(num_expected_rings, num_possible_CPUs) << "we should have a ring buffer for every possible CPU!" << std::endl; + + check_event_is_not_overwritten(h); + scap_close(h); +} + +TEST(modern_bpf, one_buffer_every_two_possible_CPUs) +{ + char error_buffer[FILENAME_MAX] = {0}; + int ret = 0; + scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, 2, false); + ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine with one ring buffer every 2 CPUs: " << error_buffer << std::endl; + + ssize_t num_possible_CPUs = sysconf(_SC_NPROCESSORS_CONF); + uint32_t num_expected_rings = num_possible_CPUs / 2; + if(num_possible_CPUs % 2 != 0) + { + num_expected_rings++; + } + uint32_t num_rings = scap_get_ndevs(h); + ASSERT_EQ(num_rings, num_expected_rings) << "we should have one ring buffer every 2 CPUs!" << std::endl; + + check_event_is_not_overwritten(h); + scap_close(h); +} + +TEST(modern_bpf, one_buffer_shared_between_all_possible_CPUs_with_special_value) +{ + char error_buffer[FILENAME_MAX] = {0}; + int ret = 0; + /* `0` is a special value that means one single shared ring buffer */ + scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, 0, false); + ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine with one single shared ring buffer: " << error_buffer << std::endl; + + uint32_t num_rings = scap_get_ndevs(h); + ASSERT_EQ(num_rings, 1) << "we should have only one ring buffer shared between all CPUs!" << std::endl; + + check_event_is_not_overwritten(h); + scap_close(h); +} + +/* In this test we don't need to check for buffer corruption with `check_event_is_not_overwritten` + * we have already done it in the previous test `one_buffer_shared_between_all_CPUs_with_special_value`. + */ +TEST(modern_bpf, one_buffer_shared_between_all_online_CPUs_with_explicit_CPUs_number) +{ + char error_buffer[FILENAME_MAX] = {0}; + int ret = 0; + + ssize_t num_possible_CPUs = sysconf(_SC_NPROCESSORS_ONLN); + + scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, num_possible_CPUs, true); + ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine with one single shared ring buffer: " << error_buffer << std::endl; + + uint32_t num_rings = scap_get_ndevs(h); + ASSERT_EQ(num_rings, 1) << "we should have only one ring buffer shared between all CPUs!" << std::endl; + + scap_close(h); +} + +#if defined(__NR_close) && defined(__NR_openat) && defined(__NR_listen) && defined(__NR_accept4) && defined(__NR_getegid) && defined(__NR_getgid) && defined(__NR_geteuid) && defined(__NR_getuid) && defined(__NR_bind) && defined(__NR_connect) && defined(__NR_sendto) && defined(__NR_sendmsg) && defined(__NR_recvmsg) && defined(__NR_recvfrom) && defined(__NR_socket) && defined(__NR_socketpair) + +/* Number of events we want to assert */ +#define EVENTS_TO_ASSERT 32 + +void check_event_order(scap_t* h) +{ + uint32_t events_to_assert[EVENTS_TO_ASSERT] = {PPME_SYSCALL_CLOSE_E, PPME_SYSCALL_CLOSE_X, PPME_SYSCALL_OPENAT_2_E, PPME_SYSCALL_OPENAT_2_X, PPME_SOCKET_LISTEN_E, PPME_SOCKET_LISTEN_X, PPME_SOCKET_ACCEPT4_5_E, PPME_SOCKET_ACCEPT4_5_X, PPME_SYSCALL_GETEGID_E, PPME_SYSCALL_GETEGID_X, PPME_SYSCALL_GETGID_E, PPME_SYSCALL_GETGID_X, PPME_SYSCALL_GETEUID_E, PPME_SYSCALL_GETEUID_X, PPME_SYSCALL_GETUID_E, PPME_SYSCALL_GETUID_X, PPME_SOCKET_BIND_E, PPME_SOCKET_BIND_X, PPME_SOCKET_CONNECT_E, PPME_SOCKET_CONNECT_X, PPME_SOCKET_SENDTO_E, PPME_SOCKET_SENDTO_X, PPME_SOCKET_SENDMSG_E, PPME_SOCKET_SENDMSG_X, PPME_SOCKET_RECVMSG_E, PPME_SOCKET_RECVMSG_X, PPME_SOCKET_RECVFROM_E, PPME_SOCKET_RECVFROM_X, PPME_SOCKET_SOCKET_E, PPME_SOCKET_SOCKET_X, PPME_SOCKET_SOCKETPAIR_E, PPME_SOCKET_SOCKETPAIR_X}; + + /* Start the capture */ + ASSERT_EQ(scap_start_capture(h), SCAP_SUCCESS) << "unable to start the capture: " << scap_getlasterr(h) << std::endl; + + /* 1. Generate a `close` event pair */ + syscall(__NR_close, -1); + + /* 2. Generate an `openat` event pair */ + syscall(__NR_openat, 0, "/**mock_path**/", 0, 0); + + /* 3. Generate a `listen` event pair */ + syscall(__NR_listen, -1, -1); + + /* 4. Generate an `accept4` event pair */ + syscall(__NR_accept4, -1, NULL, NULL, 0); + + /* 5. Generate a `getegid` event pair */ + syscall(__NR_getegid); + + /* 6. Generate a `getgid` event pair */ + syscall(__NR_getgid); + + /* 7. Generate a `geteuid` event pair */ + syscall(__NR_geteuid); + + /* 8. Generate a `getuid` event pair */ + syscall(__NR_getuid); + + /* 9. Generate a `bind` event pair */ + syscall(__NR_bind, -1, NULL, 0); + + /* 10. Generate a `connect` event pair */ + syscall(__NR_connect, -1, NULL, 0); + + /* 11. Generate a `sendto` event pair */ + syscall(__NR_sendto, -1, NULL, 0, 0, NULL, 0); + + /* 12. Generate a `sendmsg` event pair */ + syscall(__NR_sendmsg, -1, NULL, 0); + + /* 13. Generate a `recvmsg` event pair */ + syscall(__NR_recvmsg, -1, NULL, 0); + + /* 14. Generate a `recvmsg` event pair */ + syscall(__NR_recvfrom, -1, NULL, 0, 0, NULL, 0); + + /* 15. Generate a `socket` event pair */ + syscall(__NR_socket, 0, 0, 0); + + /* 16. Generate a `socketpair` event pair */ + syscall(__NR_socketpair, 0, 0, 0, 0); + + /* Stop the capture */ + ASSERT_EQ(scap_stop_capture(h), SCAP_SUCCESS) << "unable to stop the capture: " << scap_getlasterr(h) << std::endl; + + scap_evt* evt = NULL; + uint16_t buffer_id = 0; + int ret = 0; + uint64_t acutal_pid = getpid(); + /* if we hit 5 consecutive timeouts it means that all buffers are empty (approximation) */ + uint16_t timeouts = 0; + + for(int i = 0; i < EVENTS_TO_ASSERT; i++) + { + while(true) + { + ret = scap_next(h, &evt, &buffer_id); + if(ret == SCAP_SUCCESS) + { + timeouts = 0; + if(evt->tid == acutal_pid && evt->type == events_to_assert[i]) + { + /* We found our event */ + break; + } + } + else if(ret == SCAP_TIMEOUT) + { + timeouts++; + if(timeouts == 5) + { + FAIL() << "we didn't find event '" << events_to_assert[i] << "' at position '" << i << "'" << std::endl; + } + } + } + } +} + +TEST(modern_bpf, read_in_order_one_buffer_per_online_CPU) +{ + char error_buffer[FILENAME_MAX] = {0}; + int ret = 0; + /* We use buffers of 1 MB to be sure that we don't have drops */ + scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 1 * 1024 * 1024, 1, true); + ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine with one ring buffer per CPU: " << error_buffer << std::endl; + + check_event_order(h); + scap_close(h); +} + +TEST(modern_bpf, read_in_order_one_buffer_every_two_online_CPUs) +{ + char error_buffer[FILENAME_MAX] = {0}; + int ret = 0; + /* We use buffers of 1 MB to be sure that we don't have drops */ + scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 1 * 1024 * 1024, 2, true); + ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine with one ring buffer every 2 CPUs: " << error_buffer << std::endl; + + check_event_order(h); + scap_close(h); +} + +TEST(modern_bpf, read_in_order_one_buffer_shared_between_all_possible_CPUs) +{ + char error_buffer[FILENAME_MAX] = {0}; + int ret = 0; + /* We use buffers of 1 MB to be sure that we don't have drops */ + scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 1 * 1024 * 1024, 0, false); + ASSERT_EQ(!h || ret != SCAP_SUCCESS, false) << "unable to open modern bpf engine with one single shared ring buffer: " << error_buffer << std::endl; + + check_event_order(h); + scap_close(h); +} +#endif From c3a5c47706fd4546b2a7411f094b59c623f79682 Mon Sep 17 00:00:00 2001 From: Andrea Terzolo Date: Fri, 13 Jan 2023 12:39:36 +0100 Subject: [PATCH 6/6] update: set online_only as default in scap-open Signed-off-by: Andrea Terzolo Co-authored-by: Hendrik Brueckner --- userspace/libscap/examples/01-open/scap_open.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/userspace/libscap/examples/01-open/scap_open.c b/userspace/libscap/examples/01-open/scap_open.c index 6cfa998916..c4813e7a4c 100644 --- a/userspace/libscap/examples/01-open/scap_open.c +++ b/userspace/libscap/examples/01-open/scap_open.c @@ -40,7 +40,7 @@ limitations under the License. #define BUFFER_OPTION "--buffer_dim" #define SIMPLE_SET_OPTION "--simple_set" #define CPUS_FOR_EACH_BUFFER_MODE "--cpus_for_buf" -#define ALLOCATE_ONLINE_ONLY_MODE "--online_only" +#define ALL_AVAILABLE_CPUS_MODE "--available_cpus" /* PRINT */ #define VALIDATION_OPTION "--validate_syscalls" @@ -711,7 +711,9 @@ void print_help() printf("'%s ': number of events to catch before terminating. (default: UINT64_MAX)\n", NUM_EVENTS_OPTION); printf("'%s ': every event of this type will be printed to console. (default: -1, no print)\n", EVENT_TYPE_OPTION); printf("'%s ': dimension in bytes of a single per CPU buffer.\n", BUFFER_OPTION); + printf("[MODERN PROBE ONLY, EXPERIMENTAL]\n"); printf("'%s ': allocate a ring buffer for every `cpus_for_each_buffer` CPUs.\n", CPUS_FOR_EACH_BUFFER_MODE); + printf("'%s': allocate ring buffers for all available CPUs. Default: allocate ring buffers for online CPUs only.\n", ALL_AVAILABLE_CPUS_MODE); printf("\n------> VALIDATION OPTIONS\n"); printf("'%s': validation checks.\n", VALIDATION_OPTION); printf("\n------> PRINT OPTIONS\n"); @@ -820,7 +822,7 @@ void parse_CLI_options(int argc, char** argv) oargs.mode = SCAP_MODE_LIVE; modern_bpf_params.buffer_bytes_dim = buffer_bytes_dim; modern_bpf_params.cpus_for_each_buffer = DEFAULT_CPU_FOR_EACH_BUFFER; - modern_bpf_params.allocate_online_only = false; + modern_bpf_params.allocate_online_only = true; oargs.engine_params = &modern_bpf_params; } if(!strcmp(argv[i], SCAP_FILE_OPTION)) @@ -905,9 +907,9 @@ void parse_CLI_options(int argc, char** argv) modern_bpf_params.cpus_for_each_buffer = atoi(argv[++i]); } /* This should be used only with the modern probe */ - if(!strcmp(argv[i], ALLOCATE_ONLINE_ONLY_MODE)) + if(!strcmp(argv[i], ALL_AVAILABLE_CPUS_MODE)) { - modern_bpf_params.allocate_online_only = true; + modern_bpf_params.allocate_online_only = false; }