From 9447da55da69dd5b8e3773adc96b2cdea1ca7a2e Mon Sep 17 00:00:00 2001
From: Andrea Terzolo <andrea.terzolo@polito.it>
Date: Mon, 9 Jan 2023 22:04:59 +0100
Subject: [PATCH 1/6] fix: correctly free the state in modern bpf probe

Signed-off-by: Andrea Terzolo <andrea.terzolo@polito.it>
---
 userspace/libpman/src/capture.c   |  7 ++++++-
 userspace/libpman/src/lifecycle.c |  9 +++++----
 userspace/libpman/src/state.c     | 13 +++++++++----
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/userspace/libpman/src/capture.c b/userspace/libpman/src/capture.c
index c89fec7820..c4bd3ece98 100644
--- a/userspace/libpman/src/capture.c
+++ b/userspace/libpman/src/capture.c
@@ -40,7 +40,12 @@ int pman_enable_capture(bool *tp_set)
 
 int pman_disable_capture()
 {
-	return pman_detach_all_programs();
+	/* If we fail at initialization time the BPF skeleton is not initialized */
+	if(g_state.skel)
+	{
+		return pman_detach_all_programs();
+	}
+	return 0;
 }
 
 #ifdef TEST_HELPERS
diff --git a/userspace/libpman/src/lifecycle.c b/userspace/libpman/src/lifecycle.c
index 1bc35becfa..0025cc24ab 100644
--- a/userspace/libpman/src/lifecycle.c
+++ b/userspace/libpman/src/lifecycle.c
@@ -40,22 +40,23 @@ int pman_load_probe()
 
 void pman_close_probe()
 {
-	if(!g_state.cons_pos)
+	if(g_state.cons_pos)
 	{
 		free(g_state.cons_pos);
 	}
 
-	if(!g_state.prod_pos)
+	if(g_state.prod_pos)
 	{
 		free(g_state.prod_pos);
 	}
 
-	if(!g_state.skel)
+	if(g_state.skel)
 	{
+		bpf_probe__detach(g_state.skel);
 		bpf_probe__destroy(g_state.skel);
 	}
 
-	if(!g_state.rb_manager)
+	if(g_state.rb_manager)
 	{
 		ring_buffer__free(g_state.rb_manager);
 	}
diff --git a/userspace/libpman/src/state.c b/userspace/libpman/src/state.c
index bc338fc887..3f317a3354 100644
--- a/userspace/libpman/src/state.c
+++ b/userspace/libpman/src/state.c
@@ -22,16 +22,21 @@ limitations under the License.
 #include <unistd.h>
 #include "state.h"
 
-struct internal_state g_state;
+struct internal_state g_state = {};
 
 void pman_print_error(const char* error_message)
 {
 	if(!error_message)
 	{
-		fprintf(stderr, "libpman: No specific message available (errno: %d | message: %s)\n", errno, strerror(errno));
+		return;
 	}
-	else
+
+	if(errno != 0)
 	{
 		fprintf(stderr, "libpman: %s (errno: %d | message: %s)\n", error_message, errno, strerror(errno));
 	}
-}
\ No newline at end of file
+	else
+	{
+		fprintf(stderr, "libpman: %s\n", error_message);
+	}
+}

From 7bcd0f9d9b95c36a55e3a33051cc66009df24f1a Mon Sep 17 00:00:00 2001
From: Andrea Terzolo <andrea.terzolo@polito.it>
Date: Fri, 13 Jan 2023 10:22:57 +0100
Subject: [PATCH 2/6] new: support multiple CPUs per buffer

Signed-off-by: Andrea Terzolo <andrea.terzolo@polito.it>
---
 driver/modern_bpf/maps/maps.h                 |  11 +-
 userspace/libpman/include/libpman.h           |  29 ++-
 userspace/libpman/src/capture.c               |  12 +-
 userspace/libpman/src/configuration.c         |  81 ++++++-
 userspace/libpman/src/maps.c                  |   6 +-
 userspace/libpman/src/ringbuffer.c            | 210 ++++++++++--------
 userspace/libpman/src/state.h                 |  10 +-
 .../engine/modern_bpf/modern_bpf_public.h     |   5 +-
 .../engine/modern_bpf/scap_modern_bpf.c       |  24 +-
 .../engine/modern_bpf/scap_modern_bpf.h       |   7 +-
 10 files changed, 260 insertions(+), 135 deletions(-)

diff --git a/driver/modern_bpf/maps/maps.h b/driver/modern_bpf/maps/maps.h
index 4b6352e417..5f8ac193e2 100644
--- a/driver/modern_bpf/maps/maps.h
+++ b/driver/modern_bpf/maps/maps.h
@@ -139,7 +139,7 @@ struct
 
 /**
  * @brief For every CPU on the system we have a counter
- * map where we store the number of events correcty pushed
+ * map where we store the number of events correctly pushed
  * and the number of events dropped.
  */
 struct
@@ -154,9 +154,7 @@ struct
 /*=============================== RINGBUF MAP ===============================*/
 
 /**
- * @brief We will have a ringbuf map for every CPU on the system.
- * The dimension of the single ringbuf and the number of
- * ringbuf maps are set in userspace.
+ * @brief We use this map to let the verifier understand the content of our array of maps (`ringbuf_maps`)
  */
 struct ringbuf_map
 {
@@ -164,8 +162,9 @@ struct ringbuf_map
 };
 
 /**
- * @brief This array of maps will contain a ringbuf map for every CPU
- * on the system.
+ * @brief This array of maps will contain a variable number of ring buffers
+ * according to the user-provided configuration. It could also contain only
+ * one buffer shared between all CPUs. 
  */
 struct
 {
diff --git a/userspace/libpman/include/libpman.h b/userspace/libpman/include/libpman.h
index 36d94dc6c8..0351bced55 100644
--- a/userspace/libpman/include/libpman.h
+++ b/userspace/libpman/include/libpman.h
@@ -26,7 +26,7 @@ extern "C"
 #endif
 
 	/* `libpman` return values convention:
-	 * In case of success `0` is return otherwise `errno`. If `errno` is not
+	 * In case of success `0` is returned otherwise `errno`. If `errno` is not
 	 * available `-1` is returned.
 	 *
 	 * Please Note:
@@ -47,17 +47,24 @@ extern "C"
 	 *
 	 * @param verbosity use `true` if you want to activate libbpf verbosity.
 	 * @param buf_bytes_dim dimension of a single per-CPU buffer in bytes.
+	 * @param cpus_for_each_buffer number of CPUs to which we want to associate a ring buffer.
+	 * @param allocate_online_only if true, allocate ring buffers taking only into account online CPUs.
 	 * @return `0` on success, `-1` in case of error.
 	 */
-	int pman_init_state(bool verbosity, unsigned long buf_bytes_dim);
+	int pman_init_state(bool verbosity, unsigned long buf_bytes_dim, uint16_t cpus_for_each_buffer, bool allocate_online_only);
 
 	/**
-	 * @brief Return the number of available CPUs on the system, not the
-	 * online CPUs!
+	 * @brief Clear the `libpman` global state before it is used.
+	 * This API could be useful if we open the modern bpf engine multiple times.
+	 */
+	void pman_clear_state(void);
+
+	/**
+	 * @brief Return the number of allocated ring buffers.
 	 *
-	 * @return number of available CPUs on success, `-1` in case of error.
+	 * @return number of allocated ring buffers.
 	 */
-	int pman_get_cpus_number(void);
+	int pman_get_required_buffers(void);
 
 	/////////////////////////////
 	// PROBE LIFECYCLE
@@ -225,10 +232,10 @@ extern "C"
 	 *
 	 * @param event_ptr in case of success return a pointer
 	 * to the event, otherwise return NULL.
-	 * @param cpu_id in case of success returns the id of the CPU
-	 * on which we have found the event, otherwise return `-1`.
+	 * @param buffer_id in case of success returns the id of the ring buffer
+	 * from which we retrieved the event, otherwise return `-1`.
 	 */
-	void pman_consume_first_from_buffers(void** event_ptr, int16_t *cpu_id);
+	void pman_consume_first_event(void** event_ptr, int16_t* buffer_id);
 
 	/////////////////////////////
 	// CAPTURE (EXCHANGE VALUES WITH BPF SIDE)
@@ -413,7 +420,7 @@ extern "C"
 	 * @brief Return `true` if all ring buffers are full. To state
 	 * that a ring buffer is full we check that the free space is less
 	 * than the `threshold`
-	 * 
+	 *
 	 * @param threshold used to check if a buffer is full
 	 * @return `true` if all buffers are full, otherwise `false`
 	 */
@@ -421,7 +428,7 @@ extern "C"
 
 	/**
 	 * @brief Get the producer pos for the required ring
-	 * 
+	 *
 	 * @param ring_num ring for which we want to obtain the producer pos
 	 * @return producer pos as an unsigned long
 	 */
diff --git a/userspace/libpman/src/capture.c b/userspace/libpman/src/capture.c
index c4bd3ece98..92d06d485a 100644
--- a/userspace/libpman/src/capture.c
+++ b/userspace/libpman/src/capture.c
@@ -66,7 +66,7 @@ int pman_print_stats()
 		return errno;
 	}
 
-	for(int index = 0; index < g_state.n_cpus; index++)
+	for(int index = 0; index < g_state.n_possible_cpus; index++)
 	{
 		if(bpf_map_lookup_elem(counter_maps_fd, &index, &cnt_map) < 0)
 		{
@@ -114,7 +114,10 @@ int pman_get_scap_stats(void *scap_stats_struct)
 	 * - stats->n_preemptions
 	 */
 
-	for(int index = 0; index < g_state.n_cpus; index++)
+	/* We always take statistics from all the CPUs, even if some of them are not online. 
+	 * If the CPU is not online the counter map will be empty.
+	 */
+	for(int index = 0; index < g_state.n_possible_cpus; index++)
 	{
 		if(bpf_map_lookup_elem(counter_maps_fd, &index, &cnt_map) < 0)
 		{
@@ -146,7 +149,10 @@ int pman_get_n_tracepoint_hit(long *n_events_per_cpu)
 		return errno;
 	}
 
-	for(int index = 0; index < g_state.n_cpus; index++)
+	/* We always take statistics from all the CPUs, even if some of them are not online. 
+	 * If the CPU is not online the counter map will be empty.
+	 */
+	for(int index = 0; index < g_state.n_possible_cpus; index++)
 	{
 		if(bpf_map_lookup_elem(counter_maps_fd, &index, &cnt_map) < 0)
 		{
diff --git a/userspace/libpman/src/configuration.c b/userspace/libpman/src/configuration.c
index 86829dacac..d4e649bbd8 100644
--- a/userspace/libpman/src/configuration.c
+++ b/userspace/libpman/src/configuration.c
@@ -44,8 +44,27 @@ static void setup_libbpf_logging(bool verbosity)
 	}
 }
 
-int pman_init_state(bool verbosity, unsigned long buf_bytes_dim)
+void pman_clear_state()
 {
+	g_state.skel = NULL;
+	g_state.rb_manager = NULL;
+	g_state.n_possible_cpus = 0;
+	g_state.n_interesting_cpus = 0;
+	g_state.allocate_online_only = false;
+	g_state.n_required_buffers = 0;
+	g_state.cpus_for_each_buffer = 0;
+	g_state.ringbuf_pos = 0;
+	g_state.cons_pos = NULL;
+	g_state.prod_pos = NULL;
+	g_state.inner_ringbuf_map_fd = 0;
+	g_state.buffer_bytes_dim = 0;
+	g_state.last_ring_read = -1;
+	g_state.last_event_size = 0;
+}
+
+int pman_init_state(bool verbosity, unsigned long buf_bytes_dim, uint16_t cpus_for_each_buffer, bool allocate_online_only)
+{
+	char error_message[MAX_ERROR_MESSAGE_LEN];
 
 	/* `LIBBPF_STRICT_ALL` turns on all supported strict features
 	 * of libbpf to simulate libbpf v1.0 behavior.
@@ -57,14 +76,64 @@ int pman_init_state(bool verbosity, unsigned long buf_bytes_dim)
 	setup_libbpf_logging(verbosity);
 
 	/* Set the available number of CPUs inside the internal state. */
-	g_state.n_cpus = libbpf_num_possible_cpus();
-	if(g_state.n_cpus <= 0)
+	g_state.n_possible_cpus = libbpf_num_possible_cpus();
+	if(g_state.n_possible_cpus <= 0)
 	{
 		pman_print_error("no available cpus");
 		return -1;
 	}
 
-	/* Set the dimension of a single per-CPU ring buffer. */
+	g_state.allocate_online_only = allocate_online_only;
+
+	if(g_state.allocate_online_only)
+	{
+		ssize_t online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+		if(online_cpus != -1)
+		{
+			/* We will allocate buffers only for online CPUs */
+			g_state.n_interesting_cpus = online_cpus;
+		}
+		else
+		{
+			/* Fallback to all available CPU even if the `allocate_online_only` flag is set to `true` */
+			g_state.n_interesting_cpus = g_state.n_possible_cpus;
+		}
+	}
+	else
+	{
+		/* We will allocate buffers only for all available CPUs */
+		g_state.n_interesting_cpus = g_state.n_possible_cpus;
+	}
+
+	/* We are requiring a buffer every `cpus_for_each_buffer` CPUs,
+	 * but `cpus_for_each_buffer` is greater than our possible CPU number!
+	 */
+	if(cpus_for_each_buffer > g_state.n_interesting_cpus)
+	{
+		snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "we are requiring a buffer every '%d' CPUs, but '%d' is greater than our interesting CPU number (%d)!", cpus_for_each_buffer, cpus_for_each_buffer, g_state.n_interesting_cpus);
+		pman_print_error((const char*)error_message);
+		return -1;
+	}
+
+	/* `0` is a special value that means a single ring buffer shared between all the CPUs */
+	if(cpus_for_each_buffer == 0)
+	{
+		/* We want a single ring buffer so 1 ring buffer for all the interesting CPUs we have */
+		g_state.cpus_for_each_buffer = g_state.n_interesting_cpus;
+	}
+	else
+	{
+		g_state.cpus_for_each_buffer = cpus_for_each_buffer;
+	}
+
+	/* Set the number of ring buffers we need */
+	g_state.n_required_buffers = g_state.n_interesting_cpus / g_state.cpus_for_each_buffer;
+	/* If we have some remaining CPUs it means that we need another buffer */
+	if((g_state.n_interesting_cpus % g_state.cpus_for_each_buffer) != 0)
+	{
+		g_state.n_required_buffers++;
+	}
+	/* Set the dimension of a single ring buffer */
 	g_state.buffer_bytes_dim = buf_bytes_dim;
 
 	/* These will be used during the ring buffer consumption phase. */
@@ -73,7 +142,7 @@ int pman_init_state(bool verbosity, unsigned long buf_bytes_dim)
 	return 0;
 }
 
-int pman_get_cpus_number()
+int pman_get_required_buffers()
 {
-	return g_state.n_cpus;
+	return g_state.n_required_buffers;
 }
diff --git a/userspace/libpman/src/maps.c b/userspace/libpman/src/maps.c
index b88d8d09bc..ad558916a4 100644
--- a/userspace/libpman/src/maps.c
+++ b/userspace/libpman/src/maps.c
@@ -244,7 +244,8 @@ int pman_fill_extra_event_prog_tail_table()
 
 static int size_auxiliary_maps()
 {
-	if(bpf_map__set_max_entries(g_state.skel->maps.auxiliary_maps, g_state.n_cpus))
+	/* We always allocate auxiliary maps from all the CPUs, even if some of them are not online. */
+	if(bpf_map__set_max_entries(g_state.skel->maps.auxiliary_maps, g_state.n_possible_cpus))
 	{
 		pman_print_error("unable to set max entries for 'auxiliary_maps'");
 		return errno;
@@ -254,7 +255,8 @@ static int size_auxiliary_maps()
 
 static int size_counter_maps()
 {
-	if(bpf_map__set_max_entries(g_state.skel->maps.counter_maps, g_state.n_cpus))
+	/* We always allocate counter maps from all the CPUs, even if some of them are not online. */
+	if(bpf_map__set_max_entries(g_state.skel->maps.counter_maps, g_state.n_possible_cpus))
 	{
 		pman_print_error(" unable to set max entries for 'counter_maps'");
 		return errno;
diff --git a/userspace/libpman/src/ringbuffer.c b/userspace/libpman/src/ringbuffer.c
index 1cdd3e4b98..3870c6304b 100644
--- a/userspace/libpman/src/ringbuffer.c
+++ b/userspace/libpman/src/ringbuffer.c
@@ -26,6 +26,8 @@ limitations under the License.
 
 #include "ringbuffer_definitions.h"
 
+/* Utility functions object loading */
+
 /* This must be done to please the verifier! At load-time, the verifier must know the
  * size of a map inside the array.
  */
@@ -55,7 +57,11 @@ static int ringbuf_array_set_inner_map()
 
 static int ringbuf_array_set_max_entries()
 {
-	if(bpf_map__set_max_entries(g_state.skel->maps.ringbuf_maps, g_state.n_cpus))
+	/* We always allocate a number of entries equal to the available CPUs.
+	 * This doesn't mean that we allocate a ring buffer for every available CPU,
+	 * it means only that every CPU will have an associated entry.
+	 */
+	if(bpf_map__set_max_entries(g_state.skel->maps.ringbuf_maps, g_state.n_possible_cpus))
 	{
 		pman_print_error("unable to set max entries for the ringbuf_array");
 		return errno;
@@ -66,8 +72,8 @@ static int ringbuf_array_set_max_entries()
 static int allocate_consumer_producer_positions()
 {
 	g_state.ringbuf_pos = 0;
-	g_state.cons_pos = (unsigned long *)calloc(g_state.n_cpus, sizeof(unsigned long));
-	g_state.prod_pos = (unsigned long *)calloc(g_state.n_cpus, sizeof(unsigned long));
+	g_state.cons_pos = (unsigned long *)calloc(g_state.n_required_buffers, sizeof(unsigned long));
+	g_state.prod_pos = (unsigned long *)calloc(g_state.n_required_buffers, sizeof(unsigned long));
 	if(g_state.cons_pos == NULL || g_state.prod_pos == NULL)
 	{
 		pman_print_error("failed to alloc memory for cons_pos and prod_pos");
@@ -76,6 +82,7 @@ static int allocate_consumer_producer_positions()
 	return 0;
 }
 
+/* Before loading */
 int pman_prepare_ringbuf_array_before_loading()
 {
 	int err;
@@ -86,122 +93,145 @@ int pman_prepare_ringbuf_array_before_loading()
 	return err;
 }
 
-static int create_first_ringbuffer_map()
+static bool is_cpu_online(uint16_t cpu_id)
 {
-	int ringubuf_array_fd = -1;
-	int ringbuf_map_fd = -1;
-	int index = 0;
-
-	/* We don't need anymore the inner map, close it. */
-	close(g_state.inner_ringbuf_map_fd);
-
-	/* `ringbuf_array` is a maps array, every map inside it is a `BPF_MAP_TYPE_RINGBUF`. */
-	ringubuf_array_fd = bpf_map__fd(g_state.skel->maps.ringbuf_maps);
-	if(ringubuf_array_fd <= 0)
+	/* CPU 0 is always online */
+	if(cpu_id == 0)
 	{
-		pman_print_error("failed to get the ringubuf_array");
-		return errno;
+		return true;
 	}
 
-	/* create the first ringbuf map. */
-	ringbuf_map_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, g_state.buffer_bytes_dim, NULL);
-	if(ringbuf_map_fd <= 0)
+	char filename[FILENAME_MAX];
+	int online = 0;
+	snprintf(filename, sizeof(filename), "/sys/devices/system/cpu/cpu%d/online", cpu_id);
+	FILE *fp = fopen(filename, "r");
+	if(fp == NULL)
 	{
-		pman_print_error("failed to create the first ringbuf map");
-		goto clean_create_first_ringbuffer_map;
+		/* When missing NUMA properties, CPUs do not expose online information.
+		 * Fallback at considering them online if we can at least reach their folder.
+		 * This is useful for example for raspPi devices.
+		 * See: https://github.com/kubernetes/kubernetes/issues/95039
+		 */
+		snprintf(filename, sizeof(filename), "/sys/devices/system/cpu/cpu%d/", cpu_id);
+		if(access(filename, F_OK) == 0)
+		{
+			return true;
+		}
+		else
+		{
+			return false;
+		}
 	}
 
-	/* add the first ringbuf map into the array. */
-	if(bpf_map_update_elem(ringubuf_array_fd, &index, &ringbuf_map_fd, BPF_ANY))
+	fscanf(fp, "%d", &online);
+	return online == 1;
+}
+
+/* After loading */
+int pman_finalize_ringbuf_array_after_loading()
+{
+	int ringubuf_array_fd = -1;
+	char error_message[MAX_ERROR_MESSAGE_LEN];
+	int *ringbufs_fds = (int *)calloc(g_state.n_required_buffers, sizeof(int));
+	bool success = false;
+
+	/* We don't need anymore the inner map, close it. */
+	close(g_state.inner_ringbuf_map_fd);
+
+	/* Create ring buffer maps. */
+	for(int i = 0; i < g_state.n_required_buffers; i++)
 	{
-		pman_print_error("failed to add the first ringbuf map into the array");
-		goto clean_create_first_ringbuffer_map;
+		ringbufs_fds[i] = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, g_state.buffer_bytes_dim, NULL);
+		if(ringbufs_fds[i] <= 0)
+		{
+			snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "failed to create the ringbuf map for CPU '%d'. (If you get memory allocation errors try to reduce the buffer dimension)", i);
+			pman_print_error((const char *)error_message);
+			goto clean_percpu_ring_buffers;
+		}
 	}
 
-	g_state.rb_manager = ring_buffer__new(ringbuf_map_fd, NULL, NULL, NULL);
+	/* Create the ringbuf manager */
+	g_state.rb_manager = ring_buffer__new(ringbufs_fds[0], NULL, NULL, NULL);
 	if(!g_state.rb_manager)
 	{
-		pman_print_error("failed to instantiate the ringbuf manager. (If you get memory allocation errors try to reduce the buffer dimension)");
-		goto clean_create_first_ringbuffer_map;
+		pman_print_error("failed to instantiate the ringbuf manager.");
+		goto clean_percpu_ring_buffers;
 	}
-	return 0;
-
-clean_create_first_ringbuffer_map:
-	close(ringbuf_map_fd);
-	close(ringubuf_array_fd);
-	return errno;
-}
-
-static int create_remaining_ringbuffer_maps()
-{
-	int ringubuf_array_fd = -1;
-	int ringbuf_map_fd = -1;
-	char error_message[MAX_ERROR_MESSAGE_LEN];
 
-	/* the first ringbuf map is already inserted into the array.
-	 * See `create_first_ringbuffer_map()` function.
+	/* Add all remaining buffers into the manager.
+	 * We start from 1 because the first one is
+	 * used to instantiate the manager.
 	 */
-	int index = 1;
+	for(int i = 1; i < g_state.n_required_buffers; i++)
+	{
+		if(ring_buffer__add(g_state.rb_manager, ringbufs_fds[i], NULL, NULL))
+		{
+			snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "failed to add the ringbuf map for CPU %d into the manager", i);
+			pman_print_error((const char *)error_message);
+			goto clean_percpu_ring_buffers;
+		}
+	}
 
-	/* get the ringbuf_array with a map already in it. */
+	/* `ringbuf_array` is a maps array, every map inside it is a `BPF_MAP_TYPE_RINGBUF`. */
 	ringubuf_array_fd = bpf_map__fd(g_state.skel->maps.ringbuf_maps);
 	if(ringubuf_array_fd <= 0)
 	{
-		pman_print_error("failed to get a not empty ringubuf_array");
+		pman_print_error("failed to get the ringubuf_array");
 		return errno;
 	}
 
-	/* for all CPUs add the rinugbuf map into the array and add it also
-	 * into the ringbuf manager. Please note: we have already initialized the
-	 * the ringbuf_array and the manager with the map for the CPU `0`.
-	 */
-	for(index = 1; index < g_state.n_cpus; index++)
+	/* We need to associate every CPU to the right ring buffer */
+	int ringbuf_id = 0;
+	int reached = 0;
+	for(int i = 0; i < g_state.n_possible_cpus; i++)
 	{
-		ringbuf_map_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, g_state.buffer_bytes_dim, NULL);
-		if(ringbuf_map_fd <= 0)
+		/* If we want to allocate only buffers for online CPUs and the CPU is online, fill its
+		 * ring buffer array entry, otherwise we can go on with the next online CPU
+		 */
+		if(g_state.allocate_online_only && !is_cpu_online(i))
 		{
-			snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "failed to create the ringbuf map for CPU %d", index);
-			pman_print_error((const char *)error_message);
-			goto clean_create_remaining_ringbuffer_maps;
+			continue;
 		}
 
-		if(bpf_map_update_elem(ringubuf_array_fd, &index, &ringbuf_map_fd, BPF_ANY))
+		if(bpf_map_update_elem(ringubuf_array_fd, &i, &ringbufs_fds[ringbuf_id], BPF_ANY))
 		{
-			snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "failed to add the ringbuf map for CPU %d into the array", index);
+			snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "failed to add the ringbuf map for CPU '%d' to ringbuf '%d'", i, ringbuf_id);
 			pman_print_error((const char *)error_message);
-			goto clean_create_remaining_ringbuffer_maps;
+			goto clean_percpu_ring_buffers;
 		}
 
-		/* add the new ringbuf map into the manager. */
-		if(ring_buffer__add(g_state.rb_manager, ringbuf_map_fd, NULL, NULL))
+		if(++reached == g_state.cpus_for_each_buffer)
 		{
-			snprintf(error_message, MAX_ERROR_MESSAGE_LEN, "failed to add the ringbuf map for CPU %d into the manager", index);
-			pman_print_error((const char *)error_message);
-			goto clean_create_remaining_ringbuffer_maps;
+			/* we need to switch to the next buffer */
+			reached = 0;
+			ringbuf_id++;
 		}
 	}
-	return 0;
+	success = true;
+
+clean_percpu_ring_buffers:
+	for(int i = 0; i < g_state.n_required_buffers; i++)
+	{
+		if(ringbufs_fds[i])
+		{
+			close(ringbufs_fds[i]);
+		}
+	}
+	free(ringbufs_fds);
+
+	if(success)
+	{
+		return 0;
+	}
 
-clean_create_remaining_ringbuffer_maps:
-	close(ringbuf_map_fd);
 	close(ringubuf_array_fd);
+	if(g_state.rb_manager)
+	{
+		ring_buffer__free(g_state.rb_manager);
+	}
 	return errno;
 }
 
-/* Create all the ringbuffer maps inside the ringbuffer_array and assign
- * them to the manager. Note, the first ringbuffer map is separated from
- * the others because we first need to create the ringbuffer manager with
- * just one map `ring_buffer__new`. After having instanciating the manager
- * we can add to it all the other maps with `ring_buffer__add`.
- */
-int pman_finalize_ringbuf_array_after_loading()
-{
-	int err;
-	err = create_first_ringbuffer_map();
-	err = err ?: create_remaining_ringbuffer_maps();
-	return err;
-}
-
 static inline void *ringbuf__get_first_ring_event(struct ring *r, int pos)
 {
 	int *len_ptr = NULL;
@@ -209,7 +239,7 @@ static inline void *ringbuf__get_first_ring_event(struct ring *r, int pos)
 	void *sample = NULL;
 
 	/* If the consumer reaches the producer update the producer position to
-	 * get the newly collected events. 
+	 * get the newly collected events.
 	 */
 	if(g_state.cons_pos[pos] >= g_state.prod_pos[pos])
 	{
@@ -238,7 +268,7 @@ static inline void *ringbuf__get_first_ring_event(struct ring *r, int pos)
 	return sample;
 }
 
-static void ringbuf__consume_first_event(struct ring_buffer *rb, struct ppm_evt_hdr **event_ptr, int16_t *cpu_id)
+static void ringbuf__consume_first_event(struct ring_buffer *rb, struct ppm_evt_hdr **event_ptr, int16_t *buffer_id)
 {
 	uint64_t min_ts = 0xffffffffffffffffLL;
 	struct ppm_evt_hdr *tmp_pointer = NULL;
@@ -273,15 +303,15 @@ static void ringbuf__consume_first_event(struct ring_buffer *rb, struct ppm_evt_
 	}
 
 	*event_ptr = tmp_pointer;
-	*cpu_id = tmp_ring;
+	*buffer_id = tmp_ring;
 	g_state.last_ring_read = tmp_ring;
 	g_state.last_event_size = tmp_cons_increment;
 }
 
-/* This API must be used if we want to get the first event according to its timestamp */
-void pman_consume_first_from_buffers(void **event_ptr, int16_t *cpu_id)
+/* Consume */
+void pman_consume_first_event(void **event_ptr, int16_t *buffer_id)
 {
-	ringbuf__consume_first_event(g_state.rb_manager, (struct ppm_evt_hdr **)event_ptr, cpu_id);
+	ringbuf__consume_first_event(g_state.rb_manager, (struct ppm_evt_hdr **)event_ptr, buffer_id);
 }
 
 #ifdef TEST_HELPERS
@@ -292,7 +322,7 @@ void pman_consume_first_from_buffers(void **event_ptr, int16_t *cpu_id)
  */
 static bool pman_is_ringbuffer_full(int ring_num, unsigned long threshold)
 {
-	if(ring_num < 0 || ring_num >= g_state.n_cpus)
+	if(ring_num < 0 || ring_num >= g_state.n_possible_cpus)
 	{
 		return -1;
 	}
@@ -317,7 +347,7 @@ bool pman_are_all_ringbuffers_full(unsigned long threshold)
 	int attempt = 0;
 
 	/* Performs 3 attempts just to be sure that all the buffers are empty. */
-	while(pos < g_state.n_cpus)
+	while(pos < g_state.n_possible_cpus)
 	{
 		if(!pman_is_ringbuffer_full(pos, threshold))
 		{
@@ -326,7 +356,7 @@ bool pman_are_all_ringbuffers_full(unsigned long threshold)
 
 		pos++;
 
-		if(pos == g_state.n_cpus && attempt != 2)
+		if(pos == g_state.n_possible_cpus && attempt != 2)
 		{
 			printf("Stable, attempt %d\n", attempt);
 			pos = 0;
diff --git a/userspace/libpman/src/state.h b/userspace/libpman/src/state.h
index f5f3612aaf..0dcae390ec 100644
--- a/userspace/libpman/src/state.h
+++ b/userspace/libpman/src/state.h
@@ -29,14 +29,18 @@ struct internal_state
 {
 	struct bpf_probe* skel;		/* bpf skeleton with all programs and maps. */
 	struct ring_buffer* rb_manager; /* ring_buffer manager with all per-CPU ringbufs. */
-	int16_t n_cpus;			/* number of system available CPUs. */
+	int16_t n_possible_cpus;			/* number of possible system CPUs (online and not). */
+	int16_t n_interesting_cpus;			/* according to userspace configuration we can consider only online CPUs or all available CPUs. */
+	bool allocate_online_only;      /* If true we allocate ring buffers only for online CPUs */
+	uint32_t n_required_buffers;	/* number of ring buffers we need to allocate */
+	uint16_t cpus_for_each_buffer;	/* Users want a ring buffer every `cpus_for_each_buffer` CPUs */
 	int ringbuf_pos;		/* actual ringbuf we are considering. */
 	unsigned long* cons_pos;	/* every ringbuf has a consumer position. */
 	unsigned long* prod_pos;	/* every ringbuf has a producer position. */
 	int32_t inner_ringbuf_map_fd;	/* inner map used to configure the ringbuf array before loading phase. */
 	unsigned long buffer_bytes_dim; /* dimension of a single per-CPU ringbuffer in bytes. */
-	int last_ring_read; /* Last ring from which we have correctly read an event. Could be `-1` if there were no successful reads. */
-	unsigned long last_event_size; /* Last event correctly read. Could be `0` if there were no successful reads. */
+	int last_ring_read;		/* Last ring from which we have correctly read an event. Could be `-1` if there were no successful reads. */
+	unsigned long last_event_size;	/* Last event correctly read. Could be `0` if there were no successful reads. */
 };
 
 extern struct internal_state g_state;
diff --git a/userspace/libscap/engine/modern_bpf/modern_bpf_public.h b/userspace/libscap/engine/modern_bpf/modern_bpf_public.h
index 2ddeeb0e11..fd35d43528 100644
--- a/userspace/libscap/engine/modern_bpf/modern_bpf_public.h
+++ b/userspace/libscap/engine/modern_bpf/modern_bpf_public.h
@@ -16,6 +16,7 @@ limitations under the License.
 #include <stdint.h>
 
 #define MODERN_BPF_ENGINE "modern_bpf"
+#define DEFAULT_CPU_FOR_EACH_BUFFER 1
 
 #ifdef __cplusplus
 extern "C"
@@ -24,7 +25,9 @@ extern "C"
 
 	struct scap_modern_bpf_engine_params
 	{
-		unsigned long buffer_bytes_dim; ///< Dimension of a single per-CPU buffer in bytes. Please note: this buffer will be mapped twice in the process virtual memory, so pay attention to its size.
+		uint16_t cpus_for_each_buffer;	///< [EXPERIMENTAL] We will allocate a ring buffer every `cpus_for_each_buffer` CPUs. `0` is a special value and means a single ring buffer shared between all the CPUs.
+		bool allocate_online_only; ///< [EXPERIMENTAL] Allocate ring buffers only for online CPUs. The number of ring buffers allocated changes according to the `cpus_for_each_buffer` param. Please note: this buffer will be mapped twice both kernel and userspace-side, so pay attention to its size.
+		unsigned long buffer_bytes_dim; ///< Dimension of a ring buffer in bytes. The number of ring buffers allocated changes according to the `cpus_for_each_buffer` param. Please note: this buffer will be mapped twice both kernel and userspace-side, so pay attention to its size.
 	};
 
 #ifdef __cplusplus
diff --git a/userspace/libscap/engine/modern_bpf/scap_modern_bpf.c b/userspace/libscap/engine/modern_bpf/scap_modern_bpf.c
index f5de7eb63d..9f7ab6d93f 100644
--- a/userspace/libscap/engine/modern_bpf/scap_modern_bpf.c
+++ b/userspace/libscap/engine/modern_bpf/scap_modern_bpf.c
@@ -128,9 +128,13 @@ static void scap_modern_bpf__free_engine(struct scap_engine_handle engine)
 	free(engine.m_handle);
 }
 
-static int32_t scap_modern_bpf__next(struct scap_engine_handle engine, OUT scap_evt** pevent, OUT uint16_t* pcpuid)
+/* The third parameter is not the CPU number from which we extract the event but the ring buffer number.
+ * For the old BPF probe and the kernel module the number of CPUs is equal to the number of buffers since we always use a per-CPU approach.
+ */
+static int32_t scap_modern_bpf__next(struct scap_engine_handle engine, OUT scap_evt** pevent, OUT uint16_t* buffer_id)
 {
-	pman_consume_first_from_buffers((void**)pevent, pcpuid);
+	pman_consume_first_event((void**)pevent, buffer_id);
+
 	if((*pevent) == NULL)
 	{
 		/* The first time we sleep 500 us, if we have consecutive timeouts we can reach also 30 ms. */
@@ -210,10 +214,12 @@ int32_t scap_modern_bpf__init(scap_t* handle, scap_open_args* oargs)
 	struct scap_modern_bpf_engine_params* params = oargs->engine_params;
 	bool libbpf_verbosity = false;
 
+	pman_clear_state();
+
 	/* Some checks to test if we can use the modern BPF probe
 	 * - check the ring-buffer dimension in bytes.
 	 * - check the minimum required kernel version.
-	 * 
+	 *
 	 * Please note the presence of BTF is directly checked by `libbpf` see `bpf_object__load_vmlinux_btf` method.
 	 */
 	if(check_buffer_bytes_dim(handle->m_lasterr, params->buffer_bytes_dim) != SCAP_SUCCESS)
@@ -226,8 +232,11 @@ int32_t scap_modern_bpf__init(scap_t* handle, scap_open_args* oargs)
 		return SCAP_FAILURE;
 	}
 
-	/* Initialize the libpman internal state */
-	if(pman_init_state(libbpf_verbosity, params->buffer_bytes_dim))
+	/* Initialize the libpman internal state.
+	 * Validation of `cpus_for_each_buffer` is made inside libpman
+	 * since this is the unique place where we have the number of CPUs
+	 */
+	if(pman_init_state(libbpf_verbosity, params->buffer_bytes_dim, params->cpus_for_each_buffer, params->allocate_online_only))
 	{
 		snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "unable to configure the libpman state.");
 		return SCAP_FAILURE;
@@ -236,9 +245,6 @@ int32_t scap_modern_bpf__init(scap_t* handle, scap_open_args* oargs)
 	/* Set an initial sleep time in case of timeouts. */
 	engine.m_handle->m_retry_us = BUFFER_EMPTY_WAIT_TIME_US_START;
 
-	/* Return the number of system available CPUs, not online CPUs. */
-	engine.m_handle->m_num_cpus = pman_get_cpus_number();
-
 	/* Load and attach */
 	ret = pman_open_probe();
 	ret = ret ?: pman_prepare_ringbuf_array_before_loading();
@@ -278,7 +284,7 @@ int32_t scap_modern_bpf__close(struct scap_engine_handle engine)
 
 static uint32_t scap_modern_bpf__get_n_devs(struct scap_engine_handle engine)
 {
-	return engine.m_handle->m_num_cpus;
+	return pman_get_required_buffers();
 }
 
 int32_t scap_modern_bpf__get_stats(struct scap_engine_handle engine, OUT scap_stats* stats)
diff --git a/userspace/libscap/engine/modern_bpf/scap_modern_bpf.h b/userspace/libscap/engine/modern_bpf/scap_modern_bpf.h
index 3cd9cc2b7a..458aec5002 100644
--- a/userspace/libscap/engine/modern_bpf/scap_modern_bpf.h
+++ b/userspace/libscap/engine/modern_bpf/scap_modern_bpf.h
@@ -26,8 +26,7 @@ struct scap;
 
 struct modern_bpf_engine
 {
-	size_t m_num_cpus;
-	unsigned long m_retry_us;
-	char* m_lasterr;
-	interesting_tp_set open_tp_set;
+	unsigned long m_retry_us; /* Microseconds to wait if all ring buffers are empty */
+	char* m_lasterr; /* Last error caught by the engine */
+	interesting_tp_set open_tp_set; /* Interesting tracepoints */
 };

From e50d37cb15cd40068624d8d8e18c010867992f17 Mon Sep 17 00:00:00 2001
From: Andrea Terzolo <andrea.terzolo@polito.it>
Date: Fri, 13 Jan 2023 10:38:04 +0100
Subject: [PATCH 3/6] update: propagate support to scap-open

Signed-off-by: Andrea Terzolo <andrea.terzolo@polito.it>
---
 .../libscap/examples/01-open/scap_open.c      | 24 ++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/userspace/libscap/examples/01-open/scap_open.c b/userspace/libscap/examples/01-open/scap_open.c
index f168cfdc22..6cfa998916 100644
--- a/userspace/libscap/examples/01-open/scap_open.c
+++ b/userspace/libscap/examples/01-open/scap_open.c
@@ -39,6 +39,8 @@ limitations under the License.
 #define EVENT_TYPE_OPTION "--evt_type"
 #define BUFFER_OPTION "--buffer_dim"
 #define SIMPLE_SET_OPTION "--simple_set"
+#define CPUS_FOR_EACH_BUFFER_MODE "--cpus_for_buf"
+#define ALLOCATE_ONLINE_ONLY_MODE "--online_only"
 
 /* PRINT */
 #define VALIDATION_OPTION "--validate_syscalls"
@@ -709,6 +711,7 @@ void print_help()
 	printf("'%s <num_events>': number of events to catch before terminating. (default: UINT64_MAX)\n", NUM_EVENTS_OPTION);
 	printf("'%s <event_type>': every event of this type will be printed to console. (default: -1, no print)\n", EVENT_TYPE_OPTION);
 	printf("'%s <dim>': dimension in bytes of a single per CPU buffer.\n", BUFFER_OPTION);
+	printf("'%s <cpus_for_each_buffer>': allocate a ring buffer for every `cpus_for_each_buffer` CPUs.\n", CPUS_FOR_EACH_BUFFER_MODE);
 	printf("\n------> VALIDATION OPTIONS\n");
 	printf("'%s': validation checks.\n", VALIDATION_OPTION);
 	printf("\n------> PRINT OPTIONS\n");
@@ -731,7 +734,8 @@ void print_scap_source()
 	}
 	else if(strcmp(oargs.engine_name, MODERN_BPF_ENGINE) == 0)
 	{
-		printf("* Modern BPF probe.\n");
+		struct scap_modern_bpf_engine_params* params = oargs.engine_params;
+		printf("* Modern BPF probe, 1 ring buffer every %d CPUs\n", params->cpus_for_each_buffer);
 	}
 	else if(strcmp(oargs.engine_name, SAVEFILE_ENGINE) == 0)
 	{
@@ -815,6 +819,8 @@ void parse_CLI_options(int argc, char** argv)
 			oargs.engine_name = MODERN_BPF_ENGINE;
 			oargs.mode = SCAP_MODE_LIVE;
 			modern_bpf_params.buffer_bytes_dim = buffer_bytes_dim;
+			modern_bpf_params.cpus_for_each_buffer = DEFAULT_CPU_FOR_EACH_BUFFER;
+			modern_bpf_params.allocate_online_only = false;
 			oargs.engine_params = &modern_bpf_params;
 		}
 		if(!strcmp(argv[i], SCAP_FILE_OPTION))
@@ -888,6 +894,22 @@ void parse_CLI_options(int argc, char** argv)
 		{
 			enable_simple_set();
 		}
+		/* This should be used only with the modern probe */
+		if(!strcmp(argv[i], CPUS_FOR_EACH_BUFFER_MODE))
+		{
+			if(!(i + 1 < argc))
+			{
+				printf("\nYou need to specify also the number of CPUs. Bye!\n");
+				exit(EXIT_FAILURE);
+			}
+			modern_bpf_params.cpus_for_each_buffer = atoi(argv[++i]);
+		}
+		/* This should be used only with the modern probe */
+		if(!strcmp(argv[i], ALLOCATE_ONLINE_ONLY_MODE))
+		{
+			modern_bpf_params.allocate_online_only = true;
+		}
+
 
 		/*=============================== CONFIGURATIONS ===========================*/
 

From 43b33a9c1eaa3b12c9462bbe666e2ae24a671e2f Mon Sep 17 00:00:00 2001
From: Andrea Terzolo <andrea.terzolo@polito.it>
Date: Fri, 13 Jan 2023 10:38:43 +0100
Subject: [PATCH 4/6] update: propagate support to sinsp

Signed-off-by: Andrea Terzolo <andrea.terzolo@polito.it>
---
 userspace/libsinsp/examples/test.cpp | 2 +-
 userspace/libsinsp/sinsp.cpp         | 4 +++-
 userspace/libsinsp/sinsp.h           | 6 +++++-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/userspace/libsinsp/examples/test.cpp b/userspace/libsinsp/examples/test.cpp
index 92ef660d0c..15057f86e2 100644
--- a/userspace/libsinsp/examples/test.cpp
+++ b/userspace/libsinsp/examples/test.cpp
@@ -194,7 +194,7 @@ void open_engine(sinsp& inspector)
 	}
 	else if(!engine_string.compare(MODERN_BPF_ENGINE))
 	{
-		inspector.open_modern_bpf(buffer_bytes_dim, ppm_sc, tp_set);
+		inspector.open_modern_bpf(buffer_bytes_dim, DEFAULT_CPU_FOR_EACH_BUFFER, true, ppm_sc, tp_set);
 	}
 	else
 	{
diff --git a/userspace/libsinsp/sinsp.cpp b/userspace/libsinsp/sinsp.cpp
index 70986307f0..1532c668f3 100644
--- a/userspace/libsinsp/sinsp.cpp
+++ b/userspace/libsinsp/sinsp.cpp
@@ -606,7 +606,7 @@ void sinsp::open_gvisor(const std::string& config_path, const std::string& root_
 	set_get_procs_cpu_from_driver(false);
 }
 
-void sinsp::open_modern_bpf(unsigned long driver_buffer_bytes_dim, const std::unordered_set<uint32_t> &ppm_sc_of_interest, const std::unordered_set<uint32_t> &tp_of_interest)
+void sinsp::open_modern_bpf(unsigned long driver_buffer_bytes_dim, uint16_t cpus_for_each_buffer, bool online_only, const std::unordered_set<uint32_t> &ppm_sc_of_interest, const std::unordered_set<uint32_t> &tp_of_interest)
 {
 	scap_open_args oargs = factory_open_args(MODERN_BPF_ENGINE, SCAP_MODE_LIVE);
 
@@ -617,6 +617,8 @@ void sinsp::open_modern_bpf(unsigned long driver_buffer_bytes_dim, const std::un
 	/* Engine-specific args. */
 	struct scap_modern_bpf_engine_params params;
 	params.buffer_bytes_dim = driver_buffer_bytes_dim;
+	params.cpus_for_each_buffer = cpus_for_each_buffer;
+	params.allocate_online_only = online_only;
 	oargs.engine_params = &params;
 	open_common(&oargs);
 }
diff --git a/userspace/libsinsp/sinsp.h b/userspace/libsinsp/sinsp.h
index 0fb5619b5a..83c7e02d67 100644
--- a/userspace/libsinsp/sinsp.h
+++ b/userspace/libsinsp/sinsp.h
@@ -223,7 +223,11 @@ class SINSP_PUBLIC sinsp : public capture_stats_source
 	virtual void open_savefile(const std::string &filename, int fd = 0);
 	virtual void open_plugin(const std::string &plugin_name, const std::string &plugin_open_params);
 	virtual void open_gvisor(const std::string &config_path, const std::string &root_path);
-	virtual void open_modern_bpf(unsigned long driver_buffer_bytes_dim = DEFAULT_DRIVER_BUFFER_BYTES_DIM, const std::unordered_set<uint32_t> &ppm_sc_of_interest = {}, const std::unordered_set<uint32_t> &tp_of_interest = {});
+	/*[EXPERIMENTAL] This API could change between releases, we are trying to find the right configuration to deploy the modern bpf probe:
+	 * `cpus_for_each_buffer` and `online_only` are the 2 experimental params. The first one allows associating more than one CPU to a single ring buffer.
+	 * The last one allows allocating ring buffers only for online CPUs and not for all system-available CPUs.
+	 */
+	virtual void open_modern_bpf(unsigned long driver_buffer_bytes_dim = DEFAULT_DRIVER_BUFFER_BYTES_DIM, uint16_t cpus_for_each_buffer = DEFAULT_CPU_FOR_EACH_BUFFER, bool online_only = true, const std::unordered_set<uint32_t> &ppm_sc_of_interest = {}, const std::unordered_set<uint32_t> &tp_of_interest = {});
 	virtual void open_test_input(scap_test_input_data *data);
 
 	scap_open_args factory_open_args(const char* engine_name, scap_mode_t scap_mode);

From e7d5ef9ccf556dbb9a5ecd9883a5f4d8bbe89a22 Mon Sep 17 00:00:00 2001
From: Andrea Terzolo <andrea.terzolo@polito.it>
Date: Fri, 13 Jan 2023 11:17:53 +0100
Subject: [PATCH 5/6] tests: add new test suite for the modern probe

Signed-off-by: Andrea Terzolo <andrea.terzolo@polito.it>
---
 userspace/libscap/test/CMakeLists.txt         |   6 +
 userspace/libscap/test/README.md              |  20 +
 .../engines/modern_bpf/modern_bpf.cpp         | 386 ++++++++++++++++++
 3 files changed, 412 insertions(+)
 create mode 100644 userspace/libscap/test/README.md
 create mode 100644 userspace/libscap/test/test_suites/engines/modern_bpf/modern_bpf.cpp

diff --git a/userspace/libscap/test/CMakeLists.txt b/userspace/libscap/test/CMakeLists.txt
index 33edceff0e..1b45528a9c 100644
--- a/userspace/libscap/test/CMakeLists.txt
+++ b/userspace/libscap/test/CMakeLists.txt
@@ -22,6 +22,12 @@ set(LIBSCAP_UNIT_TESTS_SOURCES
     scap_event.ut.cpp
 )
 
+# Modern BPF is supported only on kernel versions >= 5.8.
+# To compile these tests you need to use the Cmake option `BUILD_LIBSCAP_MODERN_BPF=On`
+if(BUILD_LIBSCAP_MODERN_BPF)
+	list(APPEND LIBSCAP_UNIT_TESTS_SOURCES ./test_suites/engines/modern_bpf/modern_bpf.cpp)
+endif()
+
 if (BUILD_LIBSCAP_GVISOR)
 	list(APPEND LIBSCAP_UNIT_TESTS_SOURCES scap_gvisor_parsers.ut.cpp)
 	include_directories(../engine/gvisor)
diff --git a/userspace/libscap/test/README.md b/userspace/libscap/test/README.md
new file mode 100644
index 0000000000..e31b737fae
--- /dev/null
+++ b/userspace/libscap/test/README.md
@@ -0,0 +1,20 @@
+# Scap tests
+
+## Compile tests
+
+```bash
+cmake -DUSE_BUNDLED_DEPS=On -DBUILD_BPF=True -DCREATE_TEST_TARGETS=On -DBUILD_LIBSCAP_GVISOR=Off ..
+make unit-test-libscap
+```
+
+You can add tests for specific engines using their Cmake options:
+- `-DBUILD_LIBSCAP_MODERN_BPF=On`
+- `-BUILD_LIBSCAP_GVISOR=On`
+
+## Run tests
+
+From the build directory:
+
+```bash
+sudo ./libscap/test/unit-test-libscap
+```
\ No newline at end of file
diff --git a/userspace/libscap/test/test_suites/engines/modern_bpf/modern_bpf.cpp b/userspace/libscap/test/test_suites/engines/modern_bpf/modern_bpf.cpp
new file mode 100644
index 0000000000..5683bc4f88
--- /dev/null
+++ b/userspace/libscap/test/test_suites/engines/modern_bpf/modern_bpf.cpp
@@ -0,0 +1,386 @@
+#include "scap.h"
+#include <gtest/gtest.h>
+#include <unordered_set>
+#include <syscall.h>
+
+/* We are supposing that if we overcome this threshold, all buffers are full */
+#define MAX_ITERATIONS 300
+
+scap_t* open_modern_bpf_engine(char* error_buf, int32_t* rc, unsigned long buffer_dim, uint16_t cpus_for_each_buffer, bool online_only, std::unordered_set<uint32_t> tp_set = {}, std::unordered_set<uint32_t> ppm_sc_set = {})
+{
+	struct scap_open_args oargs = {
+		.engine_name = MODERN_BPF_ENGINE,
+		.mode = SCAP_MODE_LIVE,
+	};
+
+	/* If empty we fill with all tracepoints */
+	if(tp_set.empty())
+	{
+		for(int i = 0; i < TP_VAL_MAX; i++)
+		{
+			oargs.tp_of_interest.tp[i] = 1;
+		}
+	}
+	else
+	{
+		for(auto tp : tp_set)
+		{
+			oargs.tp_of_interest.tp[tp] = 1;
+		}
+	}
+
+	/* If empty we fill with all syscalls */
+	if(ppm_sc_set.empty())
+	{
+		for(int i = 0; i < PPM_SC_MAX; i++)
+		{
+			oargs.ppm_sc_of_interest.ppm_sc[i] = 1;
+		}
+	}
+	else
+	{
+		for(auto ppm_sc : ppm_sc_set)
+		{
+			oargs.ppm_sc_of_interest.ppm_sc[ppm_sc] = 1;
+		}
+	}
+
+	struct scap_modern_bpf_engine_params modern_bpf_params = {
+		.cpus_for_each_buffer = cpus_for_each_buffer,
+		.allocate_online_only = online_only,
+		.buffer_bytes_dim = buffer_dim,
+	};
+	oargs.engine_params = &modern_bpf_params;
+
+	return scap_open(&oargs, error_buf, rc);
+}
+
+void check_event_is_not_overwritten(scap_t* h)
+{
+	/* Start the capture */
+	ASSERT_EQ(scap_start_capture(h), SCAP_SUCCESS) << "unable to start the capture: " << scap_getlasterr(h) << std::endl;
+
+	/* When the number of events is fixed for `MAX_ITERATIONS` we consider all the buffers full, this is just an approximation */
+	scap_stats stats = {};
+	uint64_t last_num_events = 0;
+	uint16_t iterations = 0;
+
+	while(iterations < MAX_ITERATIONS || stats.n_drops == 0)
+	{
+		ASSERT_EQ(scap_get_stats(h, &stats), SCAP_SUCCESS) << "unable to get stats: " << scap_getlasterr(h) << std::endl;
+		if(last_num_events == (stats.n_evts - stats.n_drops))
+		{
+			iterations++;
+		}
+		else
+		{
+			iterations = 0;
+			last_num_events = (stats.n_evts - stats.n_drops);
+		}
+	}
+
+	/* Stop the capture */
+	ASSERT_EQ(scap_stop_capture(h), SCAP_SUCCESS) << "unable to stop the capture: " << scap_getlasterr(h) << std::endl;
+
+	/* The idea here is to check if an event is overwritten while we still have a pointer to it.
+	 * Again this is only an approximation, we don't know if new events will be written in the buffer
+	 * under test...
+	 *
+	 * We call `scap_next` keeping the pointer to the event.
+	 * An event pointer becomes invalid when we call another `scap_next`, but until that moment it should be valid!
+	 */
+	scap_evt* evt = NULL;
+	uint16_t buffer_id;
+
+	/* The first 'scap_next` could return a `SCAP_TIMEOUT` according to the chosen `buffer_mode` so we ignore it. */
+	scap_next(h, &evt, &buffer_id);
+
+	ASSERT_EQ(scap_next(h, &evt, &buffer_id), SCAP_SUCCESS) << "unable to get an event with `scap_next`: " << scap_getlasterr(h) << std::endl;
+
+	last_num_events = 0;
+	iterations = 0;
+
+	/* We save some event info to check if they are still valid after some new events */
+	uint64_t prev_ts = evt->ts;
+	uint64_t prev_tid = evt->tid;
+	uint32_t prev_len = evt->len;
+	uint16_t prev_type = evt->type;
+	uint32_t prev_nparams = evt->nparams;
+
+	/* Start again the capture */
+	ASSERT_EQ(scap_start_capture(h), SCAP_SUCCESS) << "unable to restart the capture: " << scap_getlasterr(h) << std::endl;
+
+	/* We use the same approximation as before */
+	while(iterations < MAX_ITERATIONS)
+	{
+		ASSERT_EQ(scap_get_stats(h, &stats), SCAP_SUCCESS) << "unable to get stats: " << scap_getlasterr(h) << std::endl;
+		if(last_num_events == (stats.n_evts - stats.n_drops))
+		{
+			iterations++;
+		}
+		else
+		{
+			iterations = 0;
+			last_num_events = (stats.n_evts - stats.n_drops);
+		}
+	}
+
+	/* We check if the previously collected event is still valid */
+	ASSERT_EQ(prev_ts, evt->ts) << "different timestamp" << std::endl;
+	ASSERT_EQ(prev_tid, evt->tid) << "different thread id" << std::endl;
+	ASSERT_EQ(prev_len, evt->len) << "different event len" << std::endl;
+	ASSERT_EQ(prev_type, evt->type) << "different event type" << std::endl;
+	ASSERT_EQ(prev_nparams, evt->nparams) << "different num params" << std::endl;
+}
+
+TEST(modern_bpf, open_engine)
+{
+	char error_buffer[FILENAME_MAX] = {0};
+	int ret = 0;
+	/* we want 1 ring buffer for each CPU */
+	scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, 1, true);
+	ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine: " << error_buffer << std::endl;
+	scap_close(h);
+}
+
+TEST(modern_bpf, empty_buffer_dim)
+{
+	char error_buffer[FILENAME_MAX] = {0};
+	int ret = 0;
+	scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 0, 1, true);
+	ASSERT_TRUE(!h || ret != SCAP_SUCCESS) << "the buffer dimension is 0, we should fail: " << error_buffer << std::endl;
+	/* In case of failure the `scap_close(h)` is already called in the vtable `init` method */
+}
+
+TEST(modern_bpf, wrong_buffer_dim)
+{
+	char error_buffer[FILENAME_MAX] = {0};
+	int ret = 0;
+	/* ring buffer dim is not a multiple of PAGE_SIZE */
+	scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 1 + 4 * 4096, 1, true);
+	ASSERT_TRUE(!h || ret != SCAP_SUCCESS) << "the buffer dimension is not a multiple of the page size, we should fail: " << error_buffer << std::endl;
+}
+
+TEST(modern_bpf, not_enough_possible_CPUs)
+{
+	char error_buffer[FILENAME_MAX] = {0};
+	int ret = 0;
+
+	ssize_t num_possible_CPUs = sysconf(_SC_NPROCESSORS_CONF);
+
+	scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, num_possible_CPUs + 1, false);
+	ASSERT_TRUE(!h || ret != SCAP_SUCCESS) << "the CPUs required for each ring buffer are greater than the system possible CPUs, we should fail: " << error_buffer << std::endl;
+}
+
+TEST(modern_bpf, not_enough_online_CPUs)
+{
+	char error_buffer[FILENAME_MAX] = {0};
+	int ret = 0;
+
+	ssize_t num_online_CPUs = sysconf(_SC_NPROCESSORS_ONLN);
+
+	scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, num_online_CPUs + 1, true);
+	ASSERT_TRUE(!h || ret != SCAP_SUCCESS) << "the CPUs required for each ring buffer are greater than the system online CPUs, we should fail: " << error_buffer << std::endl;
+}
+
+TEST(modern_bpf, one_buffer_per_possible_CPU)
+{
+	char error_buffer[FILENAME_MAX] = {0};
+	int ret = 0;
+	scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, 1, false);
+	ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine with one ring buffer per CPU: " << error_buffer << std::endl;
+
+	ssize_t num_possible_CPUs = sysconf(_SC_NPROCESSORS_CONF);
+	uint32_t num_expected_rings = scap_get_ndevs(h);
+	ASSERT_EQ(num_expected_rings, num_possible_CPUs) << "we should have a ring buffer for every possible CPU!" << std::endl;
+
+	check_event_is_not_overwritten(h);
+	scap_close(h);
+}
+
+TEST(modern_bpf, one_buffer_every_two_possible_CPUs)
+{
+	char error_buffer[FILENAME_MAX] = {0};
+	int ret = 0;
+	scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, 2, false);
+	ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine with one ring buffer every 2 CPUs: " << error_buffer << std::endl;
+
+	ssize_t num_possible_CPUs = sysconf(_SC_NPROCESSORS_CONF);
+	uint32_t num_expected_rings = num_possible_CPUs / 2;
+	if(num_possible_CPUs % 2 != 0)
+	{
+		num_expected_rings++;
+	}
+	uint32_t num_rings = scap_get_ndevs(h);
+	ASSERT_EQ(num_rings, num_expected_rings) << "we should have one ring buffer every 2 CPUs!" << std::endl;
+
+	check_event_is_not_overwritten(h);
+	scap_close(h);
+}
+
+TEST(modern_bpf, one_buffer_shared_between_all_possible_CPUs_with_special_value)
+{
+	char error_buffer[FILENAME_MAX] = {0};
+	int ret = 0;
+	/* `0` is a special value that means one single shared ring buffer */
+	scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, 0, false);
+	ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine with one single shared ring buffer: " << error_buffer << std::endl;
+
+	uint32_t num_rings = scap_get_ndevs(h);
+	ASSERT_EQ(num_rings, 1) << "we should have only one ring buffer shared between all CPUs!" << std::endl;
+
+	check_event_is_not_overwritten(h);
+	scap_close(h);
+}
+
+/* In this test we don't need to check for buffer corruption with `check_event_is_not_overwritten`
+ * we have already done it in the previous test `one_buffer_shared_between_all_CPUs_with_special_value`.
+ */
+TEST(modern_bpf, one_buffer_shared_between_all_online_CPUs_with_explicit_CPUs_number)
+{
+	char error_buffer[FILENAME_MAX] = {0};
+	int ret = 0;
+
+	ssize_t num_possible_CPUs = sysconf(_SC_NPROCESSORS_ONLN);
+
+	scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 4 * 4096, num_possible_CPUs, true);
+	ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine with one single shared ring buffer: " << error_buffer << std::endl;
+
+	uint32_t num_rings = scap_get_ndevs(h);
+	ASSERT_EQ(num_rings, 1) << "we should have only one ring buffer shared between all CPUs!" << std::endl;
+
+	scap_close(h);
+}
+
+#if defined(__NR_close) && defined(__NR_openat) && defined(__NR_listen) && defined(__NR_accept4) && defined(__NR_getegid) && defined(__NR_getgid) && defined(__NR_geteuid) && defined(__NR_getuid) && defined(__NR_bind) && defined(__NR_connect) && defined(__NR_sendto) && defined(__NR_sendmsg) && defined(__NR_recvmsg) && defined(__NR_recvfrom) && defined(__NR_socket) && defined(__NR_socketpair)
+
+/* Number of events we want to assert */
+#define EVENTS_TO_ASSERT 32
+
+void check_event_order(scap_t* h)
+{
+	uint32_t events_to_assert[EVENTS_TO_ASSERT] = {PPME_SYSCALL_CLOSE_E, PPME_SYSCALL_CLOSE_X, PPME_SYSCALL_OPENAT_2_E, PPME_SYSCALL_OPENAT_2_X, PPME_SOCKET_LISTEN_E, PPME_SOCKET_LISTEN_X, PPME_SOCKET_ACCEPT4_5_E, PPME_SOCKET_ACCEPT4_5_X, PPME_SYSCALL_GETEGID_E, PPME_SYSCALL_GETEGID_X, PPME_SYSCALL_GETGID_E, PPME_SYSCALL_GETGID_X, PPME_SYSCALL_GETEUID_E, PPME_SYSCALL_GETEUID_X, PPME_SYSCALL_GETUID_E, PPME_SYSCALL_GETUID_X, PPME_SOCKET_BIND_E, PPME_SOCKET_BIND_X, PPME_SOCKET_CONNECT_E, PPME_SOCKET_CONNECT_X, PPME_SOCKET_SENDTO_E, PPME_SOCKET_SENDTO_X, PPME_SOCKET_SENDMSG_E, PPME_SOCKET_SENDMSG_X, PPME_SOCKET_RECVMSG_E, PPME_SOCKET_RECVMSG_X, PPME_SOCKET_RECVFROM_E, PPME_SOCKET_RECVFROM_X, PPME_SOCKET_SOCKET_E, PPME_SOCKET_SOCKET_X, PPME_SOCKET_SOCKETPAIR_E, PPME_SOCKET_SOCKETPAIR_X};
+
+	/* Start the capture */
+	ASSERT_EQ(scap_start_capture(h), SCAP_SUCCESS) << "unable to start the capture: " << scap_getlasterr(h) << std::endl;
+
+	/* 1. Generate a `close` event pair */
+	syscall(__NR_close, -1);
+
+	/* 2. Generate an `openat` event pair */
+	syscall(__NR_openat, 0, "/**mock_path**/", 0, 0);
+
+	/* 3. Generate a `listen` event pair */
+	syscall(__NR_listen, -1, -1);
+
+	/* 4. Generate an `accept4` event pair */
+	syscall(__NR_accept4, -1, NULL, NULL, 0);
+
+	/* 5. Generate a `getegid` event pair */
+	syscall(__NR_getegid);
+
+	/* 6. Generate a `getgid` event pair */
+	syscall(__NR_getgid);
+
+	/* 7. Generate a `geteuid` event pair */
+	syscall(__NR_geteuid);
+
+	/* 8. Generate a `getuid` event pair */
+	syscall(__NR_getuid);
+
+	/* 9. Generate a `bind` event pair */
+	syscall(__NR_bind, -1, NULL, 0);
+
+	/* 10. Generate a `connect` event pair */
+	syscall(__NR_connect, -1, NULL, 0);
+
+	/* 11. Generate a `sendto` event pair */
+	syscall(__NR_sendto, -1, NULL, 0, 0, NULL, 0);
+
+	/* 12. Generate a `sendmsg` event pair */
+	syscall(__NR_sendmsg, -1, NULL, 0);
+
+	/* 13. Generate a `recvmsg` event pair */
+	syscall(__NR_recvmsg, -1, NULL, 0);
+
+	/* 14. Generate a `recvmsg` event pair */
+	syscall(__NR_recvfrom, -1, NULL, 0, 0, NULL, 0);
+
+	/* 15. Generate a `socket` event pair */
+	syscall(__NR_socket, 0, 0, 0);
+
+	/* 16. Generate a `socketpair` event pair */
+	syscall(__NR_socketpair, 0, 0, 0, 0);
+
+	/* Stop the capture */
+	ASSERT_EQ(scap_stop_capture(h), SCAP_SUCCESS) << "unable to stop the capture: " << scap_getlasterr(h) << std::endl;
+
+	scap_evt* evt = NULL;
+	uint16_t buffer_id = 0;
+	int ret = 0;
+	uint64_t acutal_pid = getpid();
+	/* if we hit 5 consecutive timeouts it means that all buffers are empty (approximation) */
+	uint16_t timeouts = 0;
+
+	for(int i = 0; i < EVENTS_TO_ASSERT; i++)
+	{
+		while(true)
+		{
+			ret = scap_next(h, &evt, &buffer_id);
+			if(ret == SCAP_SUCCESS)
+			{
+				timeouts = 0;
+				if(evt->tid == acutal_pid && evt->type == events_to_assert[i])
+				{
+					/* We found our event */
+					break;
+				}
+			}
+			else if(ret == SCAP_TIMEOUT)
+			{
+				timeouts++;
+				if(timeouts == 5)
+				{
+					FAIL() << "we didn't find event '" << events_to_assert[i] << "' at position '" << i << "'" << std::endl;
+				}
+			}
+		}
+	}
+}
+
+TEST(modern_bpf, read_in_order_one_buffer_per_online_CPU)
+{
+	char error_buffer[FILENAME_MAX] = {0};
+	int ret = 0;
+	/* We use buffers of 1 MB to be sure that we don't have drops */
+	scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 1 * 1024 * 1024, 1, true);
+	ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine with one ring buffer per CPU: " << error_buffer << std::endl;
+
+	check_event_order(h);
+	scap_close(h);
+}
+
+TEST(modern_bpf, read_in_order_one_buffer_every_two_online_CPUs)
+{
+	char error_buffer[FILENAME_MAX] = {0};
+	int ret = 0;
+	/* We use buffers of 1 MB to be sure that we don't have drops */
+	scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 1 * 1024 * 1024, 2, true);
+	ASSERT_FALSE(!h || ret != SCAP_SUCCESS) << "unable to open modern bpf engine with one ring buffer every 2 CPUs: " << error_buffer << std::endl;
+
+	check_event_order(h);
+	scap_close(h);
+}
+
+TEST(modern_bpf, read_in_order_one_buffer_shared_between_all_possible_CPUs)
+{
+	char error_buffer[FILENAME_MAX] = {0};
+	int ret = 0;
+	/* We use buffers of 1 MB to be sure that we don't have drops */
+	scap_t* h = open_modern_bpf_engine(error_buffer, &ret, 1 * 1024 * 1024, 0, false);
+	ASSERT_EQ(!h || ret != SCAP_SUCCESS, false) << "unable to open modern bpf engine with one single shared ring buffer: " << error_buffer << std::endl;
+
+	check_event_order(h);
+	scap_close(h);
+}
+#endif

From c3a5c47706fd4546b2a7411f094b59c623f79682 Mon Sep 17 00:00:00 2001
From: Andrea Terzolo <andrea.terzolo@polito.it>
Date: Fri, 13 Jan 2023 12:39:36 +0100
Subject: [PATCH 6/6] update: set online_only as default in scap-open

Signed-off-by: Andrea Terzolo <andrea.terzolo@polito.it>
Co-authored-by: Hendrik Brueckner <brueckner@de.ibm.com>
---
 userspace/libscap/examples/01-open/scap_open.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/userspace/libscap/examples/01-open/scap_open.c b/userspace/libscap/examples/01-open/scap_open.c
index 6cfa998916..c4813e7a4c 100644
--- a/userspace/libscap/examples/01-open/scap_open.c
+++ b/userspace/libscap/examples/01-open/scap_open.c
@@ -40,7 +40,7 @@ limitations under the License.
 #define BUFFER_OPTION "--buffer_dim"
 #define SIMPLE_SET_OPTION "--simple_set"
 #define CPUS_FOR_EACH_BUFFER_MODE "--cpus_for_buf"
-#define ALLOCATE_ONLINE_ONLY_MODE "--online_only"
+#define ALL_AVAILABLE_CPUS_MODE "--available_cpus"
 
 /* PRINT */
 #define VALIDATION_OPTION "--validate_syscalls"
@@ -711,7 +711,9 @@ void print_help()
 	printf("'%s <num_events>': number of events to catch before terminating. (default: UINT64_MAX)\n", NUM_EVENTS_OPTION);
 	printf("'%s <event_type>': every event of this type will be printed to console. (default: -1, no print)\n", EVENT_TYPE_OPTION);
 	printf("'%s <dim>': dimension in bytes of a single per CPU buffer.\n", BUFFER_OPTION);
+	printf("[MODERN PROBE ONLY, EXPERIMENTAL]\n");
 	printf("'%s <cpus_for_each_buffer>': allocate a ring buffer for every `cpus_for_each_buffer` CPUs.\n", CPUS_FOR_EACH_BUFFER_MODE);
+	printf("'%s': allocate ring buffers for all available CPUs. Default: allocate ring buffers for online CPUs only.\n", ALL_AVAILABLE_CPUS_MODE);
 	printf("\n------> VALIDATION OPTIONS\n");
 	printf("'%s': validation checks.\n", VALIDATION_OPTION);
 	printf("\n------> PRINT OPTIONS\n");
@@ -820,7 +822,7 @@ void parse_CLI_options(int argc, char** argv)
 			oargs.mode = SCAP_MODE_LIVE;
 			modern_bpf_params.buffer_bytes_dim = buffer_bytes_dim;
 			modern_bpf_params.cpus_for_each_buffer = DEFAULT_CPU_FOR_EACH_BUFFER;
-			modern_bpf_params.allocate_online_only = false;
+			modern_bpf_params.allocate_online_only = true;
 			oargs.engine_params = &modern_bpf_params;
 		}
 		if(!strcmp(argv[i], SCAP_FILE_OPTION))
@@ -905,9 +907,9 @@ void parse_CLI_options(int argc, char** argv)
 			modern_bpf_params.cpus_for_each_buffer = atoi(argv[++i]);
 		}
 		/* This should be used only with the modern probe */
-		if(!strcmp(argv[i], ALLOCATE_ONLINE_ONLY_MODE))
+		if(!strcmp(argv[i], ALL_AVAILABLE_CPUS_MODE))
 		{
-			modern_bpf_params.allocate_online_only = true;
+			modern_bpf_params.allocate_online_only = false;
 		}