From eb60ed819ce5e8b7e0daea9f1b385a783363066f Mon Sep 17 00:00:00 2001 From: leshnabalara Date: Fri, 3 Oct 2025 15:58:56 -0500 Subject: [PATCH 01/18] update guest tracer to trace newer functions --- scripts/sosp24-experiments/vm_flows_exp.sh | 14 +- tracing/guest_loader.c | 124 +++++++++---- tracing/guest_tracer.bpf.c | 196 ++++++++++++++++++++- tracing/tracing_utils.h | 14 ++ utils/record-host-metrics.sh | 1 - 5 files changed, 312 insertions(+), 37 deletions(-) diff --git a/scripts/sosp24-experiments/vm_flows_exp.sh b/scripts/sosp24-experiments/vm_flows_exp.sh index 7c32db24b..80ceb43df 100755 --- a/scripts/sosp24-experiments/vm_flows_exp.sh +++ b/scripts/sosp24-experiments/vm_flows_exp.sh @@ -88,17 +88,21 @@ sleep 1 client_cores="32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63" server_cores="0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31" -num_cores=20 -client_cores_mask=($(echo $client_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) -server_cores_mask=($(echo $server_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) +# num_cores=20 +# client_cores_mask=($(echo $client_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) +# server_cores_mask=($(echo $server_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) timestamp=$(date '+%Y-%m-%d-%H-%M-%S') # 5 10 20 40 for socket_buf in 1; do for ring_buffer in 512; do # 5 10 20 40 - for i in 20; do - format_i=$(printf "%02d\n" $i) + for i in 1 4 8 20; do + num_cores=$i + client_cores_mask=($(echo $client_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) + server_cores_mask=($(echo $server_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) + + format_i=$(printf "%02d\n" $i) exp_name="${timestamp}-$(uname -r)-flow${format_i}-${iommu_config}-ringbuf-${ring_buffer}_sokcetbuf${socket_buf}_${num_cores}cores" echo $exp_name diff --git a/tracing/guest_loader.c b/tracing/guest_loader.c index aa54c36ed..7bb5da1e0 100644 --- a/tracing/guest_loader.c +++ b/tracing/guest_loader.c @@ -90,41 +90,73 @@ typedef struct const char *target_name; probe_type_t type; enum FunctionName cookie; + const char *module_name; } probe_def_t; probe_def_t probes_to_attach[] = { - {"kprobe_iommu_map", "iommu_map", PROBE_TYPE_KPROBE, IOMMU_MAP}, - {"kretprobe_iommu_map", "iommu_map", PROBE_TYPE_KRETPROBE, IOMMU_MAP}, - {"kprobe___iommu_map", "__iommu_map", PROBE_TYPE_KPROBE, IOMMU_MAP_INTERNAL}, - {"kretprobe___iommu_map", "__iommu_map", PROBE_TYPE_KRETPROBE, IOMMU_MAP_INTERNAL}, - {"kprobe_intel_iommu_iotlb_sync_map", "intel_iommu_iotlb_sync_map", PROBE_TYPE_KPROBE, IOMMU_IOTLB_SYNC_MAP}, - {"kretprobe_intel_iommu_iotlb_sync_map", "intel_iommu_iotlb_sync_map", PROBE_TYPE_KRETPROBE, IOMMU_IOTLB_SYNC_MAP}, + {"kprobe_iommu_map", "iommu_map", PROBE_TYPE_KPROBE, IOMMU_MAP,NULL}, + {"kretprobe_iommu_map", "iommu_map", PROBE_TYPE_KRETPROBE, IOMMU_MAP,NULL}, + {"kprobe___iommu_map", "__iommu_map", PROBE_TYPE_KPROBE, IOMMU_MAP_INTERNAL,NULL}, + {"kretprobe___iommu_map", "__iommu_map", PROBE_TYPE_KRETPROBE, IOMMU_MAP_INTERNAL,NULL}, + {"kprobe_intel_iommu_iotlb_sync_map", "intel_iommu_iotlb_sync_map", PROBE_TYPE_KPROBE, IOMMU_IOTLB_SYNC_MAP,NULL}, + {"kretprobe_intel_iommu_iotlb_sync_map", "intel_iommu_iotlb_sync_map", PROBE_TYPE_KRETPROBE, IOMMU_IOTLB_SYNC_MAP,NULL}, //cache_tag_flush_range_np - {"kprobe_cache_tag_flush_range_np", "cache_tag_flush_range_np", PROBE_TYPE_KPROBE, CACHE_TAG_FLUSH_RANGE_NP}, - {"kretprobe_cache_tag_flush_range_np", "cache_tag_flush_range_np", PROBE_TYPE_KRETPROBE, CACHE_TAG_FLUSH_RANGE_NP}, + {"kprobe_cache_tag_flush_range_np", "cache_tag_flush_range_np", PROBE_TYPE_KPROBE, CACHE_TAG_FLUSH_RANGE_NP,NULL}, + {"kretprobe_cache_tag_flush_range_np", "cache_tag_flush_range_np", PROBE_TYPE_KRETPROBE, CACHE_TAG_FLUSH_RANGE_NP,NULL}, // iommu_flush_write_buffer - {"kprobe_iommu_flush_write_buffer", "iommu_flush_write_buffer", PROBE_TYPE_KPROBE, IOMMU_FLUSH_WRITE_BUFFER}, - {"kretprobe_iommu_flush_write_buffer", "iommu_flush_write_buffer", PROBE_TYPE_KRETPROBE, IOMMU_FLUSH_WRITE_BUFFER}, - {"kprobe_iommu_unmap", "iommu_unmap", PROBE_TYPE_KPROBE, IOMMU_UNMAP}, - {"kretprobe_iommu_unmap", "iommu_unmap", PROBE_TYPE_KRETPROBE, IOMMU_UNMAP}, - {"kprobe___iommu_unmap", "__iommu_unmap", PROBE_TYPE_KPROBE, IOMMU_UNMAP_INTERNAL}, - {"kretprobe___iommu_unmap", "__iommu_unmap", PROBE_TYPE_KRETPROBE, IOMMU_UNMAP_INTERNAL}, - {"kprobe_intel_iommu_tlb_sync", "intel_iommu_tlb_sync", PROBE_TYPE_KPROBE, IOMMU_TLB_SYNC}, - {"kretprobe_intel_iommu_tlb_sync", "intel_iommu_tlb_sync", PROBE_TYPE_KRETPROBE, IOMMU_TLB_SYNC}, + {"kprobe_iommu_flush_write_buffer", "iommu_flush_write_buffer", PROBE_TYPE_KPROBE, IOMMU_FLUSH_WRITE_BUFFER,NULL}, + {"kretprobe_iommu_flush_write_buffer", "iommu_flush_write_buffer", PROBE_TYPE_KRETPROBE, IOMMU_FLUSH_WRITE_BUFFER,NULL}, + {"kprobe_iommu_unmap", "iommu_unmap", PROBE_TYPE_KPROBE, IOMMU_UNMAP,NULL}, + {"kretprobe_iommu_unmap", "iommu_unmap", PROBE_TYPE_KRETPROBE, IOMMU_UNMAP,NULL}, + {"kprobe___iommu_unmap", "__iommu_unmap", PROBE_TYPE_KPROBE, IOMMU_UNMAP_INTERNAL,NULL}, + {"kretprobe___iommu_unmap", "__iommu_unmap", PROBE_TYPE_KRETPROBE, IOMMU_UNMAP_INTERNAL,NULL}, + {"kprobe_intel_iommu_tlb_sync", "intel_iommu_tlb_sync", PROBE_TYPE_KPROBE, IOMMU_TLB_SYNC,NULL}, + {"kretprobe_intel_iommu_tlb_sync", "intel_iommu_tlb_sync", PROBE_TYPE_KRETPROBE, IOMMU_TLB_SYNC,NULL}, // cache_tag_flush_range - {"kprobe_cache_tag_flush_range", "cache_tag_flush_range", PROBE_TYPE_KPROBE, CACHE_TAG_FLUSH_RANGE}, - {"kretprobe_cache_tag_flush_range", "cache_tag_flush_range", PROBE_TYPE_KRETPROBE, CACHE_TAG_FLUSH_RANGE}, - {"kprobe_page_pool_alloc_netmem", "page_pool_alloc_netmem", PROBE_TYPE_KPROBE, PAGE_POOL_ALLOC}, - {"kretprobe_page_pool_alloc_netmem", "page_pool_alloc_netmem", PROBE_TYPE_KRETPROBE, PAGE_POOL_ALLOC}, - {"kprobe___page_pool_alloc_pages_slow", "__page_pool_alloc_pages_slow", PROBE_TYPE_KPROBE, PAGE_POOL_SLOW}, - {"kretprobe___page_pool_alloc_pages_slow", "__page_pool_alloc_pages_slow", PROBE_TYPE_KRETPROBE, PAGE_POOL_SLOW}, - {"kprobe_qi_submit_sync", "qi_submit_sync", PROBE_TYPE_KPROBE, QI_SUBMIT_SYNC }, - {"kretprobe_qi_submit_sync", "qi_submit_sync", PROBE_TYPE_KRETPROBE, QI_SUBMIT_SYNC}, + {"kprobe_cache_tag_flush_range", "cache_tag_flush_range", PROBE_TYPE_KPROBE, CACHE_TAG_FLUSH_RANGE,NULL}, + {"kretprobe_cache_tag_flush_range", "cache_tag_flush_range", PROBE_TYPE_KRETPROBE, CACHE_TAG_FLUSH_RANGE,NULL}, + {"kprobe_page_pool_alloc_netmem", "page_pool_alloc_netmem", PROBE_TYPE_KPROBE, PAGE_POOL_ALLOC,NULL}, + {"kretprobe_page_pool_alloc_netmem", "page_pool_alloc_netmem", PROBE_TYPE_KRETPROBE, PAGE_POOL_ALLOC,NULL}, + {"kprobe___page_pool_alloc_pages_slow", "__page_pool_alloc_pages_slow", PROBE_TYPE_KPROBE, PAGE_POOL_SLOW,NULL}, + {"kretprobe___page_pool_alloc_pages_slow", "__page_pool_alloc_pages_slow", PROBE_TYPE_KRETPROBE, PAGE_POOL_SLOW,NULL}, // qi_batch_flush_descs - {"kprobe_qi_batch_flush_descs", "qi_batch_flush_descs", PROBE_TYPE_KPROBE, QI_BATCH_FLUSH_DESCS}, - {"kretprobe_qi_batch_flush_descs", "qi_batch_flush_descs", PROBE_TYPE_KRETPROBE, QI_BATCH_FLUSH_DESCS}, - {"kprobe_qi_submit_sync", "qi_submit_sync", PROBE_TYPE_KPROBE, QI_SUBMIT_SYNC }, - {"kretprobe_qi_submit_sync", "qi_submit_sync", PROBE_TYPE_KRETPROBE, QI_SUBMIT_SYNC}, + {"kprobe_qi_batch_flush_descs", "qi_batch_flush_descs", PROBE_TYPE_KPROBE, QI_BATCH_FLUSH_DESCS,NULL}, + {"kretprobe_qi_batch_flush_descs", "qi_batch_flush_descs", PROBE_TYPE_KRETPROBE, QI_BATCH_FLUSH_DESCS,NULL}, + {"kprobe_qi_submit_sync", "qi_submit_sync", PROBE_TYPE_KPROBE, QI_SUBMIT_SYNC,NULL}, + {"kretprobe_qi_submit_sync", "qi_submit_sync", PROBE_TYPE_KRETPROBE, QI_SUBMIT_SYNC,NULL}, + {"kprobe_page_pool_dma_map", "page_pool_dma_map", PROBE_TYPE_KPROBE, PAGE_POOL_DMA_MAP,NULL}, + {"kretprobe_page_pool_dma_map", "page_pool_dma_map", PROBE_TYPE_KRETPROBE, PAGE_POOL_DMA_MAP,NULL}, + {"kprobe_trace_mlx5e_tx_dma_unmap_ktls_hook", "trace_mlx5e_tx_dma_unmap_ktls_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_TX_DMA_UNMAP_KTLS_HOOK,"mlx5_core"}, + {"kretprobe_trace_mlx5e_tx_dma_unmap_ktls_hook", "trace_mlx5e_tx_dma_unmap_ktls_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_TX_DMA_UNMAP_KTLS_HOOK,"mlx5_core"}, + {"kprobe_trace_mlx5e_dma_push_build_single_hook", "trace_mlx5e_dma_push_build_single_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_BUILD_SINGLE_HOOK,"mlx5_core"}, + {"kretprobe_trace_mlx5e_dma_push_build_single_hook", "trace_mlx5e_dma_push_build_single_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_BUILD_SINGLE_HOOK,"mlx5_core"}, + {"kprobe_trace_mlx5e_dma_push_xmit_single_hook", "trace_mlx5e_dma_push_xmit_single_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_XMIT_SINGLE_HOOK,"mlx5_core"}, + {"kretprobe_trace_mlx5e_dma_push_xmit_single_hook", "trace_mlx5e_dma_push_xmit_single_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_XMIT_SINGLE_HOOK,"mlx5_core"}, + {"kprobe_trace_mlx5e_dma_push_page_hook", "trace_mlx5e_dma_push_page_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_PAGE_HOOK,"mlx5_core"}, + {"kretprobe_trace_mlx5e_dma_push_page_hook", "trace_mlx5e_dma_push_page_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_PAGE_HOOK,"mlx5_core"}, + {"kprobe_trace_mlx5e_tx_dma_unmap_hook", "trace_mlx5e_tx_dma_unmap_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_TX_DMA_UNMAP_HOOK,"mlx5_core"}, + {"kretprobe_trace_mlx5e_tx_dma_unmap_hook", "trace_mlx5e_tx_dma_unmap_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_TX_DMA_UNMAP_HOOK,"mlx5_core"}, + {"kprobe_trace_qi_submit_sync_cs", "trace_qi_submit_sync_cs", PROBE_TYPE_KPROBE, TRACE_QI_SUBMIT_SYNC_CS,NULL}, + {"kretprobe_trace_qi_submit_sync_cs", "trace_qi_submit_sync_cs", PROBE_TYPE_KRETPROBE, TRACE_QI_SUBMIT_SYNC_CS,NULL}, + {"kprobe_trace_qi_submit_sync_lock_wrapper", "trace_qi_submit_sync_lock_wrapper", PROBE_TYPE_KPROBE, TRACE_QI_SUBMIT_SYNC_LOCK_WRAPPER,NULL}, + {"kretprobe_trace_qi_submit_sync_lock_wrapper", "trace_qi_submit_sync_lock_wrapper", PROBE_TYPE_KRETPROBE, TRACE_QI_SUBMIT_SYNC_LOCK_WRAPPER,NULL}, + {"kprobe_trace_iommu_flush_write_buffer_cs", "trace_iommu_flush_write_buffer_cs", PROBE_TYPE_KPROBE, TRACE_IOMMU_FLUSH_WRITE_BUFFER_CS,NULL}, + {"kretprobe_trace_iommu_flush_write_buffer_cs", "trace_iommu_flush_write_buffer_cs", PROBE_TYPE_KRETPROBE, TRACE_IOMMU_FLUSH_WRITE_BUFFER_CS,NULL}, + {"kprobe_trace_iommu_flush_write_buffer_lock_wrapper", "trace_iommu_flush_write_buffer_lock_wrapper", PROBE_TYPE_KPROBE, TRACE_IOMMU_FLUSH_WRITE_BUFFER_LOCK_WRAPPER,NULL}, + {"kretprobe_trace_iommu_flush_write_buffer_lock_wrapper", "trace_iommu_flush_write_buffer_lock_wrapper", PROBE_TYPE_KRETPROBE, TRACE_IOMMU_FLUSH_WRITE_BUFFER_LOCK_WRAPPER,NULL}, + {"kprobe_page_pool_return_page", "page_pool_return_page", PROBE_TYPE_KPROBE, PAGE_POOL_RETURN_PAGE,NULL}, + {"kretprobe_page_pool_return_page", "page_pool_return_page", PROBE_TYPE_KRETPROBE, PAGE_POOL_RETURN_PAGE,NULL}, + {"kprobe_page_pool_put_unrefed_netmem", "page_pool_put_unrefed_netmem", PROBE_TYPE_KPROBE, PAGE_POOL_PUT_NETMEM,NULL}, + {"kretprobe_page_pool_put_unrefed_netmem", "page_pool_put_unrefed_netmem", PROBE_TYPE_KRETPROBE, PAGE_POOL_PUT_NETMEM,NULL}, + {"kprobe_page_pool_put_unrefed_page", "page_pool_put_unrefed_page", PROBE_TYPE_KPROBE, PAGE_POOL_PUT_PAGE,NULL}, + {"kretprobe_page_pool_put_unrefed_page", "page_pool_put_unrefed_page", PROBE_TYPE_KRETPROBE, PAGE_POOL_PUT_PAGE,NULL}, + // --- Additions for count functions --- + {"kprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook", "count_mlx5e_alloc_rx_mpwqe_perpage_hook", PROBE_TYPE_KPROBE, COUNT_MLX5E_RX_MPWQE_PER_PAGE,"mlx5_core"}, + {"kretprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook", "count_mlx5e_alloc_rx_mpwqe_perpage_hook", PROBE_TYPE_KRETPROBE, COUNT_MLX5E_RX_MPWQE_PER_PAGE,"mlx5_core"}, + {"kprobe_count_page_pool_release_page_dma_hook", "count_page_pool_release_page_dma_hook", PROBE_TYPE_KPROBE, COUNT_PAGE_POOL_RELEASE, NULL}, + {"kretprobe_count_page_pool_release_page_dma_hook", "count_page_pool_release_page_dma_hook", PROBE_TYPE_KRETPROBE, COUNT_PAGE_POOL_RELEASE, NULL}, + {"kprobe_count_page_pool_recycle_in_cache_hook", "count_page_pool_recycle_in_cache_hook", PROBE_TYPE_KPROBE, COUNT_PAGE_POOL_RECYCLE, NULL}, + {"kretprobe_count_page_pool_recycle_in_cache_hook", "count_page_pool_recycle_in_cache_hook", PROBE_TYPE_KRETPROBE, COUNT_PAGE_POOL_RECYCLE, NULL}, }; const int num_probes_to_attach = sizeof(probes_to_attach) / sizeof(probes_to_attach[0]); struct bpf_link *attached_links[MAX_PROBES]; @@ -166,6 +198,38 @@ const char *func_name_to_string(enum FunctionName fn) return "qi_batch_flush_descs"; case QI_SUBMIT_SYNC: return "qi_submit_sync"; + case TRACE_MLX5E_TX_DMA_UNMAP_KTLS_HOOK: + return "trace_mlx5e_tx_dma_unmap_ktls_hook"; + case TRACE_MLX5E_DMA_PUSH_BUILD_SINGLE_HOOK: + return "trace_mlx5e_dma_push_build_single_hook"; + case TRACE_MLX5E_DMA_PUSH_XMIT_SINGLE_HOOK: + return "trace_mlx5e_dma_push_xmit_single_hook"; + case TRACE_MLX5E_DMA_PUSH_PAGE_HOOK: + return "trace_mlx5e_dma_push_page_hook"; + case TRACE_MLX5E_TX_DMA_UNMAP_HOOK: + return "trace_mlx5e_tx_dma_unmap_hook"; + case TRACE_QI_SUBMIT_SYNC_CS: + return "trace_qi_submit_sync_cs"; + case TRACE_QI_SUBMIT_SYNC_LOCK_WRAPPER: + return "trace_qi_submit_sync_lock_wrapper"; + case TRACE_IOMMU_FLUSH_WRITE_BUFFER_CS: + return "trace_iommu_flush_write_buffer_cs"; + case TRACE_IOMMU_FLUSH_WRITE_BUFFER_LOCK_WRAPPER: + return "trace_iommu_flush_write_buffer_lock_wrapper"; + case PAGE_POOL_DMA_MAP: + return "page_pool_dma_map"; + case PAGE_POOL_RETURN_PAGE: + return "page_pool_return_page"; + case COUNT_PAGE_POOL_RELEASE: + return "count_page_pool_release_page_dma_hook"; + case COUNT_PAGE_POOL_RECYCLE: + return "count_page_pool_recycle_in_cache_hook"; + case COUNT_MLX5E_RX_MPWQE_PER_PAGE: + return "count_mlx5e_alloc_rx_mpwqe_perpage_hook"; + case PAGE_POOL_PUT_NETMEM: + return "page_pool_put_unrefed_netmem"; + case PAGE_POOL_PUT_PAGE: + return "page_pool_put_unrefed_page"; default: return "UnknownFunction"; } @@ -381,12 +445,12 @@ int main(int argc, char **argv) struct bpf_link *link = NULL; if (p_def->type == PROBE_TYPE_KPROBE) { - LIBBPF_OPTS(bpf_kprobe_opts, k_opts, .bpf_cookie = p_def->cookie); + LIBBPF_OPTS(bpf_kprobe_opts, k_opts, .bpf_cookie = p_def->cookie,.module = p_def->module_name); link = bpf_program__attach_kprobe_opts(prog, p_def->target_name, &k_opts); } else if (p_def->type == PROBE_TYPE_KRETPROBE) { - LIBBPF_OPTS(bpf_kprobe_opts, kr_opts, .bpf_cookie = p_def->cookie, .retprobe = true); + LIBBPF_OPTS(bpf_kprobe_opts, kr_opts, .bpf_cookie = p_def->cookie, .retprobe = true,.module = p_def->module_name); link = bpf_program__attach_kprobe_opts(prog, p_def->target_name, &kr_opts); } diff --git a/tracing/guest_tracer.bpf.c b/tracing/guest_tracer.bpf.c index 28d8dab2a..6919d99ad 100644 --- a/tracing/guest_tracer.bpf.c +++ b/tracing/guest_tracer.bpf.c @@ -291,4 +291,198 @@ SEC("kretprobe/iommu_flush_write_buffer") int BPF_KRETPROBE(kretprobe_iommu_flush_write_buffer, void *ret) { return _bpf_utils_trace_func_exit(ctx, GUEST, false); -} \ No newline at end of file +} + +SEC("kprobe/page_pool_dma_map") +int BPF_KPROBE(kprobe_page_pool_dma_map, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/page_pool_dma_map") +int BPF_KRETPROBE(kretprobe_page_pool_dma_map, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/page_pool_return_page") +int BPF_KPROBE(kprobe_page_pool_return_page, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/page_pool_return_page") +int BPF_KRETPROBE(kretprobe_page_pool_return_page, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/page_pool_put_unrefed_netmem") +int BPF_KPROBE(kprobe_page_pool_put_unrefed_netmem, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/page_pool_put_unrefed_netmem") +int BPF_KRETPROBE(kretprobe_page_pool_put_unrefed_netmem, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/page_pool_put_unrefed_page") +int BPF_KPROBE(kprobe_page_pool_put_unrefed_page, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/page_pool_put_unrefed_page") +int BPF_KRETPROBE(kretprobe_page_pool_put_unrefed_page, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + + +SEC("kprobe/trace_mlx5e_tx_dma_unmap_ktls_hook") +int BPF_KPROBE(kprobe_trace_mlx5e_tx_dma_unmap_ktls_hook, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/trace_mlx5e_tx_dma_unmap_ktls_hook") +int BPF_KRETPROBE(kretprobe_trace_mlx5e_tx_dma_unmap_ktls_hook, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/trace_mlx5e_dma_push_build_single_hook") +int BPF_KPROBE(kprobe_trace_mlx5e_dma_push_build_single_hook, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/trace_mlx5e_dma_push_build_single_hook") +int BPF_KRETPROBE(kretprobe_trace_mlx5e_dma_push_build_single_hook, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/trace_mlx5e_dma_push_xmit_single_hook") +int BPF_KPROBE(kprobe_trace_mlx5e_dma_push_xmit_single_hook, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/trace_mlx5e_dma_push_xmit_single_hook") +int BPF_KRETPROBE(kretprobe_trace_mlx5e_dma_push_xmit_single_hook, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/trace_mlx5e_dma_push_page_hook") +int BPF_KPROBE(kprobe_trace_mlx5e_dma_push_page_hook, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/trace_mlx5e_dma_push_page_hook") +int BPF_KRETPROBE(kretprobe_trace_mlx5e_dma_push_page_hook, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/trace_mlx5e_tx_dma_unmap_hook") +int BPF_KPROBE(kprobe_trace_mlx5e_tx_dma_unmap_hook, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/trace_mlx5e_tx_dma_unmap_hook") +int BPF_KRETPROBE(kretprobe_trace_mlx5e_tx_dma_unmap_hook, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/trace_qi_submit_sync_cs") +int BPF_KPROBE(kprobe_trace_qi_submit_sync_cs, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/trace_qi_submit_sync_cs") +int BPF_KRETPROBE(kretprobe_trace_qi_submit_sync_cs, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/trace_qi_submit_sync_lock_wrapper") +int BPF_KPROBE(kprobe_trace_qi_submit_sync_lock_wrapper, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/trace_qi_submit_sync_lock_wrapper") +int BPF_KRETPROBE(kretprobe_trace_qi_submit_sync_lock_wrapper, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/trace_iommu_flush_write_buffer_cs") +int BPF_KPROBE(kprobe_trace_iommu_flush_write_buffer_cs, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/trace_iommu_flush_write_buffer_cs") +int BPF_KRETPROBE(kretprobe_trace_iommu_flush_write_buffer_cs, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/trace_iommu_flush_write_buffer_lock_wrapper") +int BPF_KPROBE(kprobe_trace_iommu_flush_write_buffer_lock_wrapper, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/trace_iommu_flush_write_buffer_lock_wrapper") +int BPF_KRETPROBE(kretprobe_trace_iommu_flush_write_buffer_lock_wrapper, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/count_mlx5e_alloc_rx_mpwqe_perpage_hook") +int BPF_KPROBE(kprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/count_mlx5e_alloc_rx_mpwqe_perpage_hook") +int BPF_KRETPROBE(kretprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/count_page_pool_release_page_dma_hook") +int BPF_KPROBE(kprobe_count_page_pool_release_page_dma_hook, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/count_page_pool_release_page_dma_hook") +int BPF_KRETPROBE(kretprobe_count_page_pool_release_page_dma_hook, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/count_page_pool_recycle_in_cache_hook") +int BPF_KPROBE(kprobe_count_page_pool_recycle_in_cache_hook, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/count_page_pool_recycle_in_cache_hook") +int BPF_KRETPROBE(kretprobe_count_page_pool_recycle_in_cache_hook, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + diff --git a/tracing/tracing_utils.h b/tracing/tracing_utils.h index 08500243d..e4a7c622d 100644 --- a/tracing/tracing_utils.h +++ b/tracing/tracing_utils.h @@ -38,6 +38,19 @@ enum FunctionName PAGE_POOL_SLOW, QI_SUBMIT_SYNC, QI_BATCH_FLUSH_DESCS, + TRACE_MLX5E_TX_DMA_UNMAP_KTLS_HOOK, + TRACE_MLX5E_DMA_PUSH_BUILD_SINGLE_HOOK, + TRACE_MLX5E_DMA_PUSH_XMIT_SINGLE_HOOK, + TRACE_MLX5E_DMA_PUSH_PAGE_HOOK, + TRACE_MLX5E_TX_DMA_UNMAP_HOOK, + TRACE_QI_SUBMIT_SYNC_CS, + TRACE_QI_SUBMIT_SYNC_LOCK_WRAPPER, + TRACE_IOMMU_FLUSH_WRITE_BUFFER_CS, + TRACE_IOMMU_FLUSH_WRITE_BUFFER_LOCK_WRAPPER, + PAGE_POOL_DMA_MAP, + PAGE_POOL_RETURN_PAGE, + PAGE_POOL_PUT_NETMEM, + PAGE_POOL_PUT_PAGE, QEMU_VTD_FETCH_INV_DESC, TRACE_FUNCS_END, // Marks the end of trace functions @@ -45,6 +58,7 @@ enum FunctionName COUNT_FUNC_START = TRACE_FUNCS_END, COUNT_PAGE_POOL_RELEASE = COUNT_FUNC_START, COUNT_PAGE_POOL_RECYCLE, + COUNT_MLX5E_RX_MPWQE_PER_PAGE, COUNT_FUNCS_END, FUNCTION_NAME_MAX = COUNT_FUNCS_END, diff --git a/utils/record-host-metrics.sh b/utils/record-host-metrics.sh index 7ec7861f7..4facde752 100644 --- a/utils/record-host-metrics.sh +++ b/utils/record-host-metrics.sh @@ -1,5 +1,4 @@ #default values -set -x SCRIPT_NAME="record-host-metrics" DEP_DIR="/home/schai" From 95994e488346812fe1b15a7205ec79888627a195 Mon Sep 17 00:00:00 2001 From: leshnabalara Date: Fri, 3 Oct 2025 16:06:04 -0500 Subject: [PATCH 02/18] Remove all probes for mlx_core module --- tracing/guest_loader.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tracing/guest_loader.c b/tracing/guest_loader.c index 7bb5da1e0..63216fa02 100644 --- a/tracing/guest_loader.c +++ b/tracing/guest_loader.c @@ -126,16 +126,16 @@ probe_def_t probes_to_attach[] = { {"kretprobe_qi_submit_sync", "qi_submit_sync", PROBE_TYPE_KRETPROBE, QI_SUBMIT_SYNC,NULL}, {"kprobe_page_pool_dma_map", "page_pool_dma_map", PROBE_TYPE_KPROBE, PAGE_POOL_DMA_MAP,NULL}, {"kretprobe_page_pool_dma_map", "page_pool_dma_map", PROBE_TYPE_KRETPROBE, PAGE_POOL_DMA_MAP,NULL}, - {"kprobe_trace_mlx5e_tx_dma_unmap_ktls_hook", "trace_mlx5e_tx_dma_unmap_ktls_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_TX_DMA_UNMAP_KTLS_HOOK,"mlx5_core"}, - {"kretprobe_trace_mlx5e_tx_dma_unmap_ktls_hook", "trace_mlx5e_tx_dma_unmap_ktls_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_TX_DMA_UNMAP_KTLS_HOOK,"mlx5_core"}, - {"kprobe_trace_mlx5e_dma_push_build_single_hook", "trace_mlx5e_dma_push_build_single_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_BUILD_SINGLE_HOOK,"mlx5_core"}, - {"kretprobe_trace_mlx5e_dma_push_build_single_hook", "trace_mlx5e_dma_push_build_single_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_BUILD_SINGLE_HOOK,"mlx5_core"}, - {"kprobe_trace_mlx5e_dma_push_xmit_single_hook", "trace_mlx5e_dma_push_xmit_single_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_XMIT_SINGLE_HOOK,"mlx5_core"}, - {"kretprobe_trace_mlx5e_dma_push_xmit_single_hook", "trace_mlx5e_dma_push_xmit_single_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_XMIT_SINGLE_HOOK,"mlx5_core"}, - {"kprobe_trace_mlx5e_dma_push_page_hook", "trace_mlx5e_dma_push_page_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_PAGE_HOOK,"mlx5_core"}, - {"kretprobe_trace_mlx5e_dma_push_page_hook", "trace_mlx5e_dma_push_page_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_PAGE_HOOK,"mlx5_core"}, - {"kprobe_trace_mlx5e_tx_dma_unmap_hook", "trace_mlx5e_tx_dma_unmap_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_TX_DMA_UNMAP_HOOK,"mlx5_core"}, - {"kretprobe_trace_mlx5e_tx_dma_unmap_hook", "trace_mlx5e_tx_dma_unmap_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_TX_DMA_UNMAP_HOOK,"mlx5_core"}, + // {"kprobe_trace_mlx5e_tx_dma_unmap_ktls_hook", "trace_mlx5e_tx_dma_unmap_ktls_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_TX_DMA_UNMAP_KTLS_HOOK,"mlx5_core"}, + // {"kretprobe_trace_mlx5e_tx_dma_unmap_ktls_hook", "trace_mlx5e_tx_dma_unmap_ktls_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_TX_DMA_UNMAP_KTLS_HOOK,"mlx5_core"}, + // {"kprobe_trace_mlx5e_dma_push_build_single_hook", "trace_mlx5e_dma_push_build_single_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_BUILD_SINGLE_HOOK,"mlx5_core"}, + // {"kretprobe_trace_mlx5e_dma_push_build_single_hook", "trace_mlx5e_dma_push_build_single_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_BUILD_SINGLE_HOOK,"mlx5_core"}, + // {"kprobe_trace_mlx5e_dma_push_xmit_single_hook", "trace_mlx5e_dma_push_xmit_single_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_XMIT_SINGLE_HOOK,"mlx5_core"}, + // {"kretprobe_trace_mlx5e_dma_push_xmit_single_hook", "trace_mlx5e_dma_push_xmit_single_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_XMIT_SINGLE_HOOK,"mlx5_core"}, + // {"kprobe_trace_mlx5e_dma_push_page_hook", "trace_mlx5e_dma_push_page_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_PAGE_HOOK,"mlx5_core"}, + // {"kretprobe_trace_mlx5e_dma_push_page_hook", "trace_mlx5e_dma_push_page_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_PAGE_HOOK,"mlx5_core"}, + // {"kprobe_trace_mlx5e_tx_dma_unmap_hook", "trace_mlx5e_tx_dma_unmap_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_TX_DMA_UNMAP_HOOK,"mlx5_core"}, + // {"kretprobe_trace_mlx5e_tx_dma_unmap_hook", "trace_mlx5e_tx_dma_unmap_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_TX_DMA_UNMAP_HOOK,"mlx5_core"}, {"kprobe_trace_qi_submit_sync_cs", "trace_qi_submit_sync_cs", PROBE_TYPE_KPROBE, TRACE_QI_SUBMIT_SYNC_CS,NULL}, {"kretprobe_trace_qi_submit_sync_cs", "trace_qi_submit_sync_cs", PROBE_TYPE_KRETPROBE, TRACE_QI_SUBMIT_SYNC_CS,NULL}, {"kprobe_trace_qi_submit_sync_lock_wrapper", "trace_qi_submit_sync_lock_wrapper", PROBE_TYPE_KPROBE, TRACE_QI_SUBMIT_SYNC_LOCK_WRAPPER,NULL}, @@ -151,8 +151,8 @@ probe_def_t probes_to_attach[] = { {"kprobe_page_pool_put_unrefed_page", "page_pool_put_unrefed_page", PROBE_TYPE_KPROBE, PAGE_POOL_PUT_PAGE,NULL}, {"kretprobe_page_pool_put_unrefed_page", "page_pool_put_unrefed_page", PROBE_TYPE_KRETPROBE, PAGE_POOL_PUT_PAGE,NULL}, // --- Additions for count functions --- - {"kprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook", "count_mlx5e_alloc_rx_mpwqe_perpage_hook", PROBE_TYPE_KPROBE, COUNT_MLX5E_RX_MPWQE_PER_PAGE,"mlx5_core"}, - {"kretprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook", "count_mlx5e_alloc_rx_mpwqe_perpage_hook", PROBE_TYPE_KRETPROBE, COUNT_MLX5E_RX_MPWQE_PER_PAGE,"mlx5_core"}, + // {"kprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook", "count_mlx5e_alloc_rx_mpwqe_perpage_hook", PROBE_TYPE_KPROBE, COUNT_MLX5E_RX_MPWQE_PER_PAGE,"mlx5_core"}, + // {"kretprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook", "count_mlx5e_alloc_rx_mpwqe_perpage_hook", PROBE_TYPE_KRETPROBE, COUNT_MLX5E_RX_MPWQE_PER_PAGE,"mlx5_core"}, {"kprobe_count_page_pool_release_page_dma_hook", "count_page_pool_release_page_dma_hook", PROBE_TYPE_KPROBE, COUNT_PAGE_POOL_RELEASE, NULL}, {"kretprobe_count_page_pool_release_page_dma_hook", "count_page_pool_release_page_dma_hook", PROBE_TYPE_KRETPROBE, COUNT_PAGE_POOL_RELEASE, NULL}, {"kprobe_count_page_pool_recycle_in_cache_hook", "count_page_pool_recycle_in_cache_hook", PROBE_TYPE_KPROBE, COUNT_PAGE_POOL_RECYCLE, NULL}, @@ -445,12 +445,12 @@ int main(int argc, char **argv) struct bpf_link *link = NULL; if (p_def->type == PROBE_TYPE_KPROBE) { - LIBBPF_OPTS(bpf_kprobe_opts, k_opts, .bpf_cookie = p_def->cookie,.module = p_def->module_name); + LIBBPF_OPTS(bpf_kprobe_opts, k_opts, .bpf_cookie = p_def->cookie); link = bpf_program__attach_kprobe_opts(prog, p_def->target_name, &k_opts); } else if (p_def->type == PROBE_TYPE_KRETPROBE) { - LIBBPF_OPTS(bpf_kprobe_opts, kr_opts, .bpf_cookie = p_def->cookie, .retprobe = true,.module = p_def->module_name); + LIBBPF_OPTS(bpf_kprobe_opts, kr_opts, .bpf_cookie = p_def->cookie, .retprobe = true); link = bpf_program__attach_kprobe_opts(prog, p_def->target_name, &kr_opts); } From 13cfa84f66a8462836296347949a3408bb6dbf35 Mon Sep 17 00:00:00 2001 From: leshnabalara Date: Fri, 3 Oct 2025 17:56:29 -0500 Subject: [PATCH 03/18] ebpf and no ebpf data --- utils/reports/ebpf.md | 122 +++++++++++++++++++++++++++++++++++++++ utils/reports/no_ebpf.md | 90 +++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 utils/reports/ebpf.md create mode 100644 utils/reports/no_ebpf.md diff --git a/utils/reports/ebpf.md b/utils/reports/ebpf.md new file mode 100644 index 000000000..4ca4b4fb5 --- /dev/null +++ b/utils/reports/ebpf.md @@ -0,0 +1,122 @@ +Usage: python script.py + +``` +------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores Run Metrics ------- +Throughput: 46.913 +CPU Util: 100.0 +Drop rate: 6.9598e-06 +Acks per page: 0.011157171172169762 +Per page stats: + IOTLB Miss: 1.0182934255241831 + IOTLB First Lookup: 9.992788985910089 + IOTLB All Lookups: 17.35123975836122 + IOTLB Inv: 0.07360253473450856 + IOMMU Mem Access: 1.9066415944407733 + PWT Occupancy: 221906394.188 +Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 171036 1139456425 6662.09 -4.24 0.005973 +1 __iommu_map -1 170998 185684984 1085.89 -0.69 0.005972 +2 intel_iommu_iotlb_sync_map -1 170922 686091943 4014.06 -3.05 0.005969 +3 cache_tag_flush_range_np -1 170866 498774037 2919.09 -2.03 0.005967 +4 iommu_flush_write_buffer -1 170811 118642694 694.58 -0.26 0.005965 +5 __iommu_unmap -1 170698 186294285 1091.37 -0.74 0.005961 +6 intel_iommu_tlb_sync -1 170646 2118537135 12414.81 385.33 0.005960 +7 cache_tag_flush_range -1 170591 1922281964 11268.37 383.70 0.005958 +8 qi_submit_sync -1 170362 1494422523 8772.04 378.01 0.005950 +9 qi_batch_flush_descs -1 340833 1807166455 5302.21 210.82 0.011903 +10 trace_qi_submit_sync_cs -1 170252 1294629687 7604.20 377.08 0.005946 +11 page_pool_put_unrefed_netmem -1 8695550 6395061571 735.44 -0.28 0.303685 +12 page_pool_put_unrefed_page -1 122 266038 2180.64 -1.07 0.000004 + +``` + +``` +------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores Run Metrics ------- +Throughput: 166.568 +CPU Util: 100.0 +Drop rate: 9.976e-07 +Acks per page: 0.013932173862926852 +Per page stats: + IOTLB Miss: 1.14558250952327 + IOTLB First Lookup: 9.777568002791028 + IOTLB All Lookups: 18.448538498145528 + IOTLB Inv: 0.09115335522866337 + IOMMU Mem Access: 2.3182612214913787 + PWT Occupancy: 971250965.412 +Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 648739 5528942156 8522.60 296.42 0.006381 +1 __iommu_map -1 648502 641657150 989.45 -0.60 0.006379 +2 intel_iommu_iotlb_sync_map -1 648270 3902221495 6019.44 295.04 0.006377 +3 cache_tag_flush_range_np -1 648045 3295997260 5086.06 295.88 0.006374 +4 iommu_flush_write_buffer -1 647803 333835119 515.33 -0.23 0.006372 +5 __iommu_unmap -1 647336 633511789 978.64 -0.54 0.006367 +6 intel_iommu_tlb_sync -1 647106 9527337524 14722.99 202.38 0.006365 +7 cache_tag_flush_range -1 646879 8864704505 13703.81 200.82 0.006363 +8 qi_submit_sync -1 645893 6084550333 9420.37 122.24 0.006353 +9 qi_batch_flush_descs -1 1292238 7132626300 5519.59 86.35 0.012711 +10 trace_qi_submit_sync_cs -1 645454 5362290481 8307.78 121.02 0.006349 +11 page_pool_put_unrefed_netmem -1 37297328 20335614809 545.23 -0.21 0.366865 +12 page_pool_put_unrefed_page -1 775 1347659 1738.91 -1.12 0.000008 +``` + +``` +------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores Run Metrics ------- +Throughput: 159.404 +CPU Util: 99.8865 +Drop rate: 2.9706e-06 +Acks per page: 0.014431629178690623 +Per page stats: + IOTLB Miss: 1.1012391911871722 + IOTLB First Lookup: 9.37129735625204 + IOTLB All Lookups: 16.570158605605883 + IOTLB Inv: 0.11572329316463828 + IOMMU Mem Access: 2.2484629998745325 + PWT Occupancy: 894057325.062 +Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 1788292 6183041804167 3.457512e+06 5.106689e+11 0.018381 +1 __iommu_map -1 1752395 2227055514 1.270860e+03 -5.300000e-01 0.018012 +2 intel_iommu_iotlb_sync_map -1 1707617 27438328576 1.606820e+04 4.706800e+02 0.017551 +3 cache_tag_flush_range_np -1 1705916 25564375458 1.498572e+04 4.691400e+02 0.017534 +4 iommu_flush_write_buffer -1 1678465 1491011350 8.883200e+02 1.700000e-01 0.017252 +5 __iommu_unmap -1 1627399 2557927152 1.571790e+03 -5.100000e-01 0.016727 +6 intel_iommu_tlb_sync -1 1570730 35766444256 2.277059e+04 2.908800e+02 0.016144 +7 cache_tag_flush_range -1 1570406 33464521935 2.130947e+04 2.929400e+02 0.016141 +8 qi_submit_sync -1 1453295 12498719171 8.600260e+03 6.011000e+01 0.014937 +9 qi_batch_flush_descs -1 2971101 15873610964 5.342670e+03 5.162000e+01 0.030538 +10 trace_qi_submit_sync_cs -1 1392530 10196588233 7.322350e+03 6.028000e+01 0.014313 +11 page_pool_put_unrefed_netmem -1 56118048 57621406170 1.026790e+03 1.900000e-01 0.576797 +12 page_pool_put_unrefed_page -1 64079 246231280081725 3.842621e+09 3.379906e+13 0.000659 +``` +``` +------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores Run Metrics ------- +Throughput: 18.0 +CPU Util: 99.99735 +Drop rate: 0.0105981691 +Acks per page: 0.02008469048888889 +Per page stats: + IOTLB Miss: 1.2731322131342222 + IOTLB First Lookup: 10.490504613432888 + IOTLB All Lookups: 18.781935896348443 + IOTLB Inv: 0.10933835639466667 + IOMMU Mem Access: 2.3686680758044445 + PWT Occupancy: 214887392.647 +Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 657423 811346922685 1234132.24 4078555.49 0.059840 +1 __iommu_map -1 629085 2667726698 4240.65 -0.39 0.057261 +2 intel_iommu_iotlb_sync_map -1 604227 752131135238 1244782.40 4167118.65 0.054998 +3 cache_tag_flush_range_np -1 580133 724976161891 1249672.34 4207074.99 0.052805 +4 iommu_flush_write_buffer -1 558943 2076529419 3715.10 3.58 0.050876 +5 __iommu_unmap -1 499915 2036478252 4073.65 -0.81 0.045503 +6 intel_iommu_tlb_sync -1 473102 360266895752 761499.41 2695061.66 0.043063 +7 cache_tag_flush_range -1 450158 344191430438 764601.39 2727268.43 0.040974 +8 qi_submit_sync -1 359073 27308442500 76052.62 2515.41 0.032684 +9 qi_batch_flush_descs -1 752523 31516145928 41880.64 2880.87 0.068496 +10 trace_qi_submit_sync_cs -1 318545 22430274841 70414.78 2367.43 0.028995 +11 page_pool_put_unrefed_netmem -1 12757594 26657209578 2089.52 -1.24 1.161225 +12 page_pool_put_unrefed_page -1 1239 11756313 9488.55 -1.85 0.000113 +``` + diff --git a/utils/reports/no_ebpf.md b/utils/reports/no_ebpf.md new file mode 100644 index 000000000..6cfbcc13c --- /dev/null +++ b/utils/reports/no_ebpf.md @@ -0,0 +1,90 @@ + +Nested, no ebpf tracing +1 flow, 1 core +``` +2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores +------- 2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores Run Metrics ------- +Throughput: 43.481 +CPU Util: 72.67 +Drop rate: 1.25985e-05 +Acks per page: 0.018491149476782962 +Per page stats: + IOTLB Miss: 0.9459983733393436 + IOTLB First Lookup: 10.262835799972954 + IOTLB All Lookups: 16.873251969221684 + IOTLB Inv: 0.10161880412759597 + IOMMU Mem Access: 1.9011407753588005 + PWT Occupancy: 205601686.882 +``` + +4 flow, 4 core + +``` +2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores +------- 2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores Run Metrics ------- +Throughput: 165.123 +CPU Util: 93.451375 +Drop rate: 9.771e-07 +Acks per page: 0.015494842821411916 +Per page stats: + IOTLB Miss: 1.1755971450774514 + IOTLB First Lookup: 9.868652860893468 + IOTLB All Lookups: 18.487392099760978 + IOTLB Inv: 0.09163031913053905 + IOMMU Mem Access: 2.360826030421637 + PWT Occupancy: 955914720.765 +``` + +8 flow, 8 core + +``` +2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores +------- 2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores Run Metrics ------- +Throughput: 229.186 +CPU Util: 99.2229375 +Drop rate: 2.0139e-06 +Acks per page: 0.017781929381375825 +Per page stats: + IOTLB Miss: 1.125802439877619 + IOTLB First Lookup: 9.977624758025778 + IOTLB All Lookups: 18.427204986125716 + IOTLB Inv: 0.11165543759608353 + IOMMU Mem Access: 2.386134777764924 + PWT Occupancy: 1371796173.0 +``` + +16 flow, 16 core + +``` +2025-10-03-17-24-12-6.12.9-iommufd-extra-hooks-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores +------- 2025-10-03-17-24-12-6.12.9-iommufd-extra-hooks-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores Run Metrics ------- +Throughput: 25.29 +CPU Util: 99.99375 +Drop rate: 0.0018222967 +Acks per page: 0.01729804450771056 +Per page stats: + IOTLB Miss: 1.1352454320132859 + IOTLB First Lookup: 10.149211487860814 + IOTLB All Lookups: 19.087077295991143 + IOTLB Inv: 0.10629719191838673 + IOMMU Mem Access: 2.340058162450929 + PWT Occupancy: 314648489.688 +``` + +20 flows 20 cores +``` +2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores +------- 2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores Run Metrics ------- +Throughput: 25.325 +CPU Util: 100.0 +Drop rate: 0.0049857471 +Acks per page: 0.017037289761105627 +Per page stats: + IOTLB Miss: 1.1891126457189338 + IOTLB First Lookup: 10.070045222068943 + IOTLB All Lookups: 19.148399357017965 + IOTLB Inv: 0.10229347197599209 + IOMMU Mem Access: 2.3235780622556366 + PWT Occupancy: 299483021.235 +``` + From d252d065e7ca9c854cf95515a34c8545687fe6a5 Mon Sep 17 00:00:00 2001 From: leshnabalara Date: Fri, 3 Oct 2025 17:59:53 -0500 Subject: [PATCH 04/18] Nit: add flow info --- utils/reports/ebpf.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/utils/reports/ebpf.md b/utils/reports/ebpf.md index 4ca4b4fb5..82edc7caa 100644 --- a/utils/reports/ebpf.md +++ b/utils/reports/ebpf.md @@ -1,5 +1,6 @@ Usage: python script.py +1 flow & core ``` ------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores Run Metrics ------- Throughput: 46.913 @@ -31,6 +32,7 @@ Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extr ``` +4 flow & core ``` ------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores Run Metrics ------- Throughput: 166.568 @@ -61,6 +63,7 @@ Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extr 12 page_pool_put_unrefed_page -1 775 1347659 1738.91 -1.12 0.000008 ``` +8 flow & core ``` ------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores Run Metrics ------- Throughput: 159.404 @@ -90,6 +93,8 @@ Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extr 11 page_pool_put_unrefed_netmem -1 56118048 57621406170 1.026790e+03 1.900000e-01 0.576797 12 page_pool_put_unrefed_page -1 64079 246231280081725 3.842621e+09 3.379906e+13 0.000659 ``` + +20 flow & core ``` ------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores Run Metrics ------- Throughput: 18.0 From 0c421be52bd1a4069dae6f0600c0074e7131392a Mon Sep 17 00:00:00 2001 From: leshnabalara Date: Mon, 6 Oct 2025 20:49:24 -0500 Subject: [PATCH 05/18] results --- results_summary/ebpf.md | 157 ++++++++++++++++++++++++++++++ results_summary/no-writel.md | 184 +++++++++++++++++++++++++++++++++++ results_summary/no_ebpf.md | 90 +++++++++++++++++ 3 files changed, 431 insertions(+) create mode 100644 results_summary/ebpf.md create mode 100644 results_summary/no-writel.md create mode 100644 results_summary/no_ebpf.md diff --git a/results_summary/ebpf.md b/results_summary/ebpf.md new file mode 100644 index 000000000..c2fb034f2 --- /dev/null +++ b/results_summary/ebpf.md @@ -0,0 +1,157 @@ +Usage: python script.py + +1 flow & core +``` +------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores Run Metrics ------- +Throughput: 46.913 +CPU Util: 100.0 +Drop rate: 6.9598e-06 +Acks per page: 0.011157171172169762 +Per page stats: + IOTLB Miss: 1.0182934255241831 + IOTLB First Lookup: 9.992788985910089 + IOTLB All Lookups: 17.35123975836122 + IOTLB Inv: 0.07360253473450856 + IOMMU Mem Access: 1.9066415944407733 + PWT Occupancy: 221906394.188 +Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 171036 1139456425 6662.09 -4.24 0.005973 +1 __iommu_map -1 170998 185684984 1085.89 -0.69 0.005972 +2 intel_iommu_iotlb_sync_map -1 170922 686091943 4014.06 -3.05 0.005969 +3 cache_tag_flush_range_np -1 170866 498774037 2919.09 -2.03 0.005967 +4 iommu_flush_write_buffer -1 170811 118642694 694.58 -0.26 0.005965 +5 __iommu_unmap -1 170698 186294285 1091.37 -0.74 0.005961 +6 intel_iommu_tlb_sync -1 170646 2118537135 12414.81 385.33 0.005960 +7 cache_tag_flush_range -1 170591 1922281964 11268.37 383.70 0.005958 +8 qi_submit_sync -1 170362 1494422523 8772.04 378.01 0.005950 +9 qi_batch_flush_descs -1 340833 1807166455 5302.21 210.82 0.011903 +10 trace_qi_submit_sync_cs -1 170252 1294629687 7604.20 377.08 0.005946 +11 page_pool_put_unrefed_netmem -1 8695550 6395061571 735.44 -0.28 0.303685 +12 page_pool_put_unrefed_page -1 122 266038 2180.64 -1.07 0.000004 + +``` + +4 flow & core +``` +------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores Run Metrics ------- +Throughput: 166.568 +CPU Util: 100.0 +Drop rate: 9.976e-07 +Acks per page: 0.013932173862926852 +Per page stats: + IOTLB Miss: 1.14558250952327 + IOTLB First Lookup: 9.777568002791028 + IOTLB All Lookups: 18.448538498145528 + IOTLB Inv: 0.09115335522866337 + IOMMU Mem Access: 2.3182612214913787 + PWT Occupancy: 971250965.412 +Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 648739 5528942156 8522.60 296.42 0.006381 +1 __iommu_map -1 648502 641657150 989.45 -0.60 0.006379 +2 intel_iommu_iotlb_sync_map -1 648270 3902221495 6019.44 295.04 0.006377 +3 cache_tag_flush_range_np -1 648045 3295997260 5086.06 295.88 0.006374 +4 iommu_flush_write_buffer -1 647803 333835119 515.33 -0.23 0.006372 +5 __iommu_unmap -1 647336 633511789 978.64 -0.54 0.006367 +6 intel_iommu_tlb_sync -1 647106 9527337524 14722.99 202.38 0.006365 +7 cache_tag_flush_range -1 646879 8864704505 13703.81 200.82 0.006363 +8 qi_submit_sync -1 645893 6084550333 9420.37 122.24 0.006353 +9 qi_batch_flush_descs -1 1292238 7132626300 5519.59 86.35 0.012711 +10 trace_qi_submit_sync_cs -1 645454 5362290481 8307.78 121.02 0.006349 +11 page_pool_put_unrefed_netmem -1 37297328 20335614809 545.23 -0.21 0.366865 +12 page_pool_put_unrefed_page -1 775 1347659 1738.91 -1.12 0.000008 +``` + +8 flow & core +``` +------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores Run Metrics ------- +Throughput: 159.404 +CPU Util: 99.8865 +Drop rate: 2.9706e-06 +Acks per page: 0.014431629178690623 +Per page stats: + IOTLB Miss: 1.1012391911871722 + IOTLB First Lookup: 9.37129735625204 + IOTLB All Lookups: 16.570158605605883 + IOTLB Inv: 0.11572329316463828 + IOMMU Mem Access: 2.2484629998745325 + PWT Occupancy: 894057325.062 +Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 1788292 6183041804167 3.457512e+06 5.106689e+11 0.018381 +1 __iommu_map -1 1752395 2227055514 1.270860e+03 -5.300000e-01 0.018012 +2 intel_iommu_iotlb_sync_map -1 1707617 27438328576 1.606820e+04 4.706800e+02 0.017551 +3 cache_tag_flush_range_np -1 1705916 25564375458 1.498572e+04 4.691400e+02 0.017534 +4 iommu_flush_write_buffer -1 1678465 1491011350 8.883200e+02 1.700000e-01 0.017252 +5 __iommu_unmap -1 1627399 2557927152 1.571790e+03 -5.100000e-01 0.016727 +6 intel_iommu_tlb_sync -1 1570730 35766444256 2.277059e+04 2.908800e+02 0.016144 +7 cache_tag_flush_range -1 1570406 33464521935 2.130947e+04 2.929400e+02 0.016141 +8 qi_submit_sync -1 1453295 12498719171 8.600260e+03 6.011000e+01 0.014937 +9 qi_batch_flush_descs -1 2971101 15873610964 5.342670e+03 5.162000e+01 0.030538 +10 trace_qi_submit_sync_cs -1 1392530 10196588233 7.322350e+03 6.028000e+01 0.014313 +11 page_pool_put_unrefed_netmem -1 56118048 57621406170 1.026790e+03 1.900000e-01 0.576797 +12 page_pool_put_unrefed_page -1 64079 246231280081725 3.842621e+09 3.379906e+13 0.000659 +``` +16 flows and 16 cores +``` +------- 2025-10-06-12-26-40-6.12.9-iommufd-extra-hooks-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores Run Metrics ------- +Throughput: 22.923 +CPU Util: 100.0 +Drop rate: 0.0017054154 +Acks per page: 0.016549748357544827 +Per page stats: + IOTLB Miss: 1.2494411740173625 + IOTLB First Lookup: 10.822018943436024 + IOTLB All Lookups: 20.935928685790863 + IOTLB Inv: 0.10902082135846093 + IOMMU Mem Access: 2.5122845386561967 + PWT Occupancy: 292633867.938 +Reading eBPF stats from ../utils/reports/2025-10-06-12-26-40-6.12.9-iommufd-extra-hooks-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 141280 165511298028 1171512.59 2849190.97 0.010098 +1 __iommu_map -1 141089 669363924 4744.27 0.28 0.010084 +2 intel_iommu_iotlb_sync_map -1 140911 163506242740 1160351.16 2849650.36 0.010071 +3 cache_tag_flush_range_np -1 140712 162640296791 1155838.14 2847809.43 0.010057 +4 iommu_flush_write_buffer -1 140510 549611875 3911.55 4.07 0.010043 +5 __iommu_unmap -1 140126 640535257 4571.14 0.73 0.010015 +6 intel_iommu_tlb_sync -1 139960 99460641432 710636.19 1806841.42 0.010004 +7 cache_tag_flush_range -1 139758 98606857659 705554.30 1806076.71 0.009989 +8 qi_submit_sync -1 139074 12812886972 92130.00 2863.00 0.009940 +9 qi_batch_flush_descs -1 278472 14038507279 50412.63 3808.88 0.019904 +10 trace_qi_submit_sync_cs -1 138739 11813025979 85145.68 2737.90 0.009916 +11 page_pool_put_unrefed_netmem -1 7701347 16226888883 2107.02 -1.12 0.550447 +12 page_pool_put_unrefed_page -1 243 2766926 11386.53 7.58 0.000017 +``` + +20 flow & core +``` +------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores Run Metrics ------- +Throughput: 18.0 +CPU Util: 99.99735 +Drop rate: 0.0105981691 +Acks per page: 0.02008469048888889 +Per page stats: + IOTLB Miss: 1.2731322131342222 + IOTLB First Lookup: 10.490504613432888 + IOTLB All Lookups: 18.781935896348443 + IOTLB Inv: 0.10933835639466667 + IOMMU Mem Access: 2.3686680758044445 + PWT Occupancy: 214887392.647 +Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 657423 811346922685 1234132.24 4078555.49 0.059840 +1 __iommu_map -1 629085 2667726698 4240.65 -0.39 0.057261 +2 intel_iommu_iotlb_sync_map -1 604227 752131135238 1244782.40 4167118.65 0.054998 +3 cache_tag_flush_range_np -1 580133 724976161891 1249672.34 4207074.99 0.052805 +4 iommu_flush_write_buffer -1 558943 2076529419 3715.10 3.58 0.050876 +5 __iommu_unmap -1 499915 2036478252 4073.65 -0.81 0.045503 +6 intel_iommu_tlb_sync -1 473102 360266895752 761499.41 2695061.66 0.043063 +7 cache_tag_flush_range -1 450158 344191430438 764601.39 2727268.43 0.040974 +8 qi_submit_sync -1 359073 27308442500 76052.62 2515.41 0.032684 +9 qi_batch_flush_descs -1 752523 31516145928 41880.64 2880.87 0.068496 +10 trace_qi_submit_sync_cs -1 318545 22430274841 70414.78 2367.43 0.028995 +11 page_pool_put_unrefed_netmem -1 12757594 26657209578 2089.52 -1.24 1.161225 +12 page_pool_put_unrefed_page -1 1239 11756313 9488.55 -1.85 0.000113 +``` + diff --git a/results_summary/no-writel.md b/results_summary/no-writel.md new file mode 100644 index 000000000..55eb6a72d --- /dev/null +++ b/results_summary/no-writel.md @@ -0,0 +1,184 @@ + + +## Skip writel inside qi_submit_sync +``` +------- 2025-10-06-19-38-52-6.12.9-iommufd-no-writel-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores Run Metrics ------- +Throughput: 221.323 +CPU Util: 99.9435 +Drop rate: 2.16e-06 +Acks per page: 0.016818608357920324 +Per page stats: + IOTLB Miss: 1.193588035875169 + IOTLB First Lookup: 10.154951503097283 + IOTLB All Lookups: 18.511458759769134 + IOTLB Inv: 0.10790433717236798 + IOMMU Mem Access: 2.46306754547878 + PWT Occupancy: 1327972808.688 +Reading eBPF stats from ../utils/reports/2025-10-06-19-38-52-6.12.9-iommufd-no-writel-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 1047815 23282950277 22220.48 6.153900e+02 0.007757 +1 __iommu_map -1 1047503 1331851370 1271.45 -5.200000e-01 0.007754 +2 intel_iommu_iotlb_sync_map -1 1047164 19863291284 18968.65 6.142500e+02 0.007752 +3 cache_tag_flush_range_np -1 1046841 18529973119 17700.85 6.150900e+02 0.007750 +4 iommu_flush_write_buffer -1 1046515 750177138 716.83 -1.000000e-01 0.007747 +5 __iommu_unmap -1 1045850 1659236044 1586.50 -6.100000e-01 0.007742 +6 intel_iommu_tlb_sync -1 1045530 29404222953 28123.75 3.974000e+02 0.007740 +7 cache_tag_flush_range -1 1045243 27779604559 26577.17 3.964100e+02 0.007738 +8 qi_submit_sync -1 1043958 12048701206 11541.37 8.735000e+01 0.007728 +9 qi_batch_flush_descs -1 2088451 14317708334 6855.66 8.300000e+01 0.015460 +10 trace_qi_submit_sync_cs -1 1043377 10693274521 10248.72 8.539000e+01 0.007724 +11 page_pool_put_unrefed_netmem -1 47632317 42919767274 901.06 2.200000e-01 0.352610 +12 page_pool_put_unrefed_page -1 59655 2273501167146 38110823.35 2.165818e+13 0.000442 +``` + +``` +------- 2025-10-06-20-29-06-6.12.9-iommufd-no-writel-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores Run Metrics ------- +Throughput: 344.445 +CPU Util: 88.65359375 +Drop rate: 0.0006922006 +Acks per page: 0.04947615057498294 +Per page stats: + IOTLB Miss: 1.1823682853053463 + IOTLB First Lookup: 10.051158940975725 + IOTLB All Lookups: 18.71930524478509 + IOTLB Inv: 0.0 + IOMMU Mem Access: 2.3886559346902527 + PWT Occupancy: 2404329229.812 +Reading eBPF stats from ../utils/reports/2025-10-06-20-29-06-6.12.9-iommufd-no-writel-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 1812756 33672264078 18575.18 7.445000e+01 0.008623 +1 __iommu_map -1 1810810 3166224794 1748.51 -3.300000e-01 0.008613 +2 intel_iommu_iotlb_sync_map -1 1809203 25750148415 14232.87 6.754000e+01 0.008606 +3 cache_tag_flush_range_np -1 1807744 22765040935 12593.07 6.628000e+01 0.008599 +4 iommu_flush_write_buffer -1 1806428 2277398114 1260.72 5.800000e-01 0.008593 +5 __iommu_unmap -1 1804470 3720187399 2061.65 -8.000000e-02 0.008583 +6 intel_iommu_tlb_sync -1 1803452 29789340349 16517.96 7.166000e+01 0.008578 +7 cache_tag_flush_range -1 1802531 26262353258 14569.71 7.034000e+01 0.008574 +8 qi_submit_sync -1 1798525 4777184118 2656.17 2.800000e-01 0.008555 +9 qi_batch_flush_descs -1 3598571 10213020018 2838.08 3.900000e+00 0.017117 +10 trace_qi_submit_sync_cs -1 1797013 1666616643 927.44 1.000000e-01 0.008548 +11 page_pool_put_unrefed_netmem -1 75371077 103532980185 1373.64 4.700000e-01 0.358513 +12 page_pool_put_unrefed_page -1 286598 5549429177935 19363112.02 3.359202e+12 0.001363 +``` + +``` +------- 2025-10-06-20-33-59-6.12.9-iommufd-no-writel-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores Run Metrics ------- +Throughput: 273.807 +CPU Util: 88.6303 +Drop rate: 0.0008421404 +Acks per page: 0.027850167138166664 +Per page stats: + IOTLB Miss: 1.1876182486481939 + IOTLB First Lookup: 10.057482141626705 + IOTLB All Lookups: 16.625878772712163 + IOTLB Inv: 0.0 + IOMMU Mem Access: 2.371095155346649 + PWT Occupancy: 1754318361.312 +Reading eBPF stats from ../utils/reports/2025-10-06-20-33-59-6.12.9-iommufd-no-writel-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 3132235 70782379754734 22598042.53 4.326659e+12 1.874260e-02 +1 __iommu_map -1 3128350 6072035322 1940.97 -8.000000e-02 1.871935e-02 +2 intel_iommu_iotlb_sync_map -1 3125110 18683662375656 5978561.51 2.691555e+12 1.869996e-02 +3 cache_tag_flush_range_np -1 3122556 11432105531267 3661137.07 1.642538e+12 1.868468e-02 +4 iommu_flush_write_buffer -1 3119714 4732753253 1517.05 3.700000e-01 1.866767e-02 +5 __iommu_unmap -1 3107225 6504158151 2093.24 3.100000e-01 1.859294e-02 +6 intel_iommu_tlb_sync -1 2933930 571113353457 194658.14 6.992730e+10 1.755598e-02 +7 cache_tag_flush_range -1 2830536 565955611454 199946.45 7.248161e+10 1.693730e-02 +8 page_pool_alloc_pages_slow -1 10 85974405 8597440.50 6.412534e+06 5.983777e-08 +9 qi_submit_sync -1 2517698 8268650461 3284.21 2.150000e+00 1.506534e-02 +10 qi_batch_flush_descs -1 5226719 17182429639 3287.42 8.630000e+00 3.127552e-02 +11 trace_qi_submit_sync_cs -1 2425961 3092718584 1274.84 4.400000e-01 1.451641e-02 +12 page_pool_dma_map -1 576 83552389 145056.23 2.081800e+02 3.446655e-06 +13 page_pool_put_unrefed_netmem -1 94866517 97957354275 1032.58 3.000000e-01 5.676601e-01 +14 page_pool_put_unrefed_page -1 17649 31020915 1757.66 7.000000e-02 1.056077e-04 +``` + + +## Skip writel by completely skipping adding inv desc to the queue +``` +------- 2025-10-06-19-20-34-6.12.9-iommufd-no-writel-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores Run Metrics ------- +Throughput: 205.745 +CPU Util: 18.4179375 +Drop rate: 0.0002968918 +Acks per page: 0.024417714370701595 +Per page stats: + IOTLB Miss: 1.109332706970201 + IOTLB First Lookup: 9.793780529278864 + IOTLB All Lookups: 18.30769636900046 + IOTLB Inv: 0.0 + IOMMU Mem Access: 2.0692728206013467 + PWT Occupancy: 1167869442.235 +Reading eBPF stats from ../utils/reports/2025-10-06-19-20-34-6.12.9-iommufd-no-writel-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 51094 2535634400119 49626852.47 4.192656e+13 4.068746e-04 +1 __iommu_map -1 51117 86530820 1692.80 -1.010000e+00 4.070577e-04 +2 intel_iommu_iotlb_sync_map -1 51105 845660382643 16547507.73 1.398393e+13 4.069622e-04 +3 cache_tag_flush_range_np -1 51106 204407715 3999.68 -3.690000e+00 4.069701e-04 +4 iommu_flush_write_buffer -1 51111 41164866 805.40 -5.200000e-01 4.070100e-04 +5 __iommu_unmap -1 50215 66860701 1331.49 -1.200000e-01 3.998749e-04 +6 intel_iommu_tlb_sync -1 50214 198388710 3950.86 -4.300000e+00 3.998669e-04 +7 cache_tag_flush_range -1 50214 121889266 2427.40 -1.060000e+00 3.998669e-04 +8 page_pool_alloc_pages_slow -1 14 27952081 1996577.21 2.951799e+05 1.114856e-07 +9 qi_batch_flush_descs -1 101332 79839949 787.90 -5.300000e-01 8.069326e-04 +10 page_pool_dma_map -1 896 25555232 28521.46 2.094390e+03 7.135077e-06 +11 page_pool_put_unrefed_netmem -1 2526972 2140852256 847.20 -3.600000e-01 2.012292e-02 +12 page_pool_put_unrefed_page -1 303 1943751 6415.02 6.960000e+00 2.412866e-06 +``` + +``` +------- 2025-10-06-19-09-36-6.12.9-iommufd-no-writel-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores Run Metrics ------- +Throughput: 278.491 +CPU Util: 99.8200625 +Drop rate: 0.0007358606 +Acks per page: 0.04088521947782873 +Per page stats: + IOTLB Miss: 1.3003971434049932 + IOTLB First Lookup: 11.068191231112015 + IOTLB All Lookups: 19.45128046858109 + IOTLB Inv: 0.0 + IOMMU Mem Access: 2.6470694707678737 + PWT Occupancy: 2025183414.0 +Reading eBPF stats from ../utils/reports/2025-10-06-19-09-36-6.12.9-iommufd-no-writel-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 3329559 45403674725 13636.54 2.746000e+01 0.019588 +1 __iommu_map -1 3325899 6733381585 2024.53 -7.300000e-01 0.019567 +2 intel_iommu_iotlb_sync_map -1 3322760 27883185261 8391.57 2.163000e+01 0.019548 +3 cache_tag_flush_range_np -1 3319157 21978534541 6621.72 2.141000e+01 0.019527 +4 iommu_flush_write_buffer -1 3315799 3343987221 1008.50 8.000000e-02 0.019507 +5 __iommu_unmap -1 3249278 7903708504 2432.45 -1.290000e+00 0.019116 +6 intel_iommu_tlb_sync -1 3246832 26718261701 8229.03 2.330000e+01 0.019102 +7 cache_tag_flush_range -1 3242299 19393264953 5981.33 2.494000e+01 0.019075 +8 qi_batch_flush_descs -1 5800249 5479981052 944.78 1.300000e-01 0.034124 +9 page_pool_put_unrefed_netmem -1 92422315 169904140855 1838.35 -2.500000e-01 0.543733 +10 page_pool_put_unrefed_page -1 1331792 4311900421260 3237668.06 6.647313e+11 0.007835 +``` + +``` +------- 2025-10-06-19-14-32-6.12.9-iommufd-no-writel-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores Run Metrics ------- +Throughput: 336.996 +CPU Util: 99.9726 +Drop rate: 0.0041669753 +Acks per page: 0.046164218162826856 +Per page stats: + IOTLB Miss: 1.2341563189117972 + IOTLB First Lookup: 10.724418027914776 + IOTLB All Lookups: 15.919911099616042 + IOTLB Inv: 0.0 + IOMMU Mem Access: 2.459357086910278 + PWT Occupancy: 2153650287.471 +Reading eBPF stats from ../utils/reports/2025-10-06-19-14-32-6.12.9-iommufd-no-writel-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 2607288 36804723226 14116.09 2.065000e+01 1.267606e-02 +1 __iommu_map -1 2605760 4287388675 1645.35 -4.700000e-01 1.266863e-02 +2 intel_iommu_iotlb_sync_map -1 2604083 24999873800 9600.26 1.854000e+01 1.266048e-02 +3 cache_tag_flush_range_np -1 2602529 21020720067 8077.04 1.790000e+01 1.265292e-02 +4 iommu_flush_write_buffer -1 2601087 2746740823 1056.00 -3.000000e-02 1.264591e-02 +5 __iommu_unmap -1 2595622 5441798798 2096.53 -4.400000e-01 1.261934e-02 +6 intel_iommu_tlb_sync -1 2594481 23519480632 9065.20 1.952000e+01 1.261379e-02 +7 cache_tag_flush_range -1 2593531 18116546724 6985.28 2.009000e+01 1.260917e-02 +8 page_pool_alloc_pages_slow -1 45 87239310 1938651.33 4.671771e+05 2.187800e-07 +9 qi_batch_flush_descs -1 5183264 5424998540 1046.64 7.000000e-02 2.519988e-02 +10 page_pool_dma_map -1 2880 80664075 28008.36 1.181200e+02 1.400192e-05 +11 page_pool_put_unrefed_netmem -1 108245673 162471196035 1500.95 2.400000e-01 5.262665e-01 +12 page_pool_put_unrefed_page -1 163198 12157466225118 74495191.27 3.622500e+13 7.934326e-04 +``` \ No newline at end of file diff --git a/results_summary/no_ebpf.md b/results_summary/no_ebpf.md new file mode 100644 index 000000000..6cfbcc13c --- /dev/null +++ b/results_summary/no_ebpf.md @@ -0,0 +1,90 @@ + +Nested, no ebpf tracing +1 flow, 1 core +``` +2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores +------- 2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores Run Metrics ------- +Throughput: 43.481 +CPU Util: 72.67 +Drop rate: 1.25985e-05 +Acks per page: 0.018491149476782962 +Per page stats: + IOTLB Miss: 0.9459983733393436 + IOTLB First Lookup: 10.262835799972954 + IOTLB All Lookups: 16.873251969221684 + IOTLB Inv: 0.10161880412759597 + IOMMU Mem Access: 1.9011407753588005 + PWT Occupancy: 205601686.882 +``` + +4 flow, 4 core + +``` +2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores +------- 2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores Run Metrics ------- +Throughput: 165.123 +CPU Util: 93.451375 +Drop rate: 9.771e-07 +Acks per page: 0.015494842821411916 +Per page stats: + IOTLB Miss: 1.1755971450774514 + IOTLB First Lookup: 9.868652860893468 + IOTLB All Lookups: 18.487392099760978 + IOTLB Inv: 0.09163031913053905 + IOMMU Mem Access: 2.360826030421637 + PWT Occupancy: 955914720.765 +``` + +8 flow, 8 core + +``` +2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores +------- 2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores Run Metrics ------- +Throughput: 229.186 +CPU Util: 99.2229375 +Drop rate: 2.0139e-06 +Acks per page: 0.017781929381375825 +Per page stats: + IOTLB Miss: 1.125802439877619 + IOTLB First Lookup: 9.977624758025778 + IOTLB All Lookups: 18.427204986125716 + IOTLB Inv: 0.11165543759608353 + IOMMU Mem Access: 2.386134777764924 + PWT Occupancy: 1371796173.0 +``` + +16 flow, 16 core + +``` +2025-10-03-17-24-12-6.12.9-iommufd-extra-hooks-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores +------- 2025-10-03-17-24-12-6.12.9-iommufd-extra-hooks-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores Run Metrics ------- +Throughput: 25.29 +CPU Util: 99.99375 +Drop rate: 0.0018222967 +Acks per page: 0.01729804450771056 +Per page stats: + IOTLB Miss: 1.1352454320132859 + IOTLB First Lookup: 10.149211487860814 + IOTLB All Lookups: 19.087077295991143 + IOTLB Inv: 0.10629719191838673 + IOMMU Mem Access: 2.340058162450929 + PWT Occupancy: 314648489.688 +``` + +20 flows 20 cores +``` +2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores +------- 2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores Run Metrics ------- +Throughput: 25.325 +CPU Util: 100.0 +Drop rate: 0.0049857471 +Acks per page: 0.017037289761105627 +Per page stats: + IOTLB Miss: 1.1891126457189338 + IOTLB First Lookup: 10.070045222068943 + IOTLB All Lookups: 19.148399357017965 + IOTLB Inv: 0.10229347197599209 + IOMMU Mem Access: 2.3235780622556366 + PWT Occupancy: 299483021.235 +``` + From 070e461ae9207798e74dfa820ee654c910c437e0 Mon Sep 17 00:00:00 2001 From: leshnabalara Date: Mon, 13 Oct 2025 02:53:04 -0500 Subject: [PATCH 06/18] no map contention resutls --- results_summary/no_map_contention.md | 149 +++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 results_summary/no_map_contention.md diff --git a/results_summary/no_map_contention.md b/results_summary/no_map_contention.md new file mode 100644 index 000000000..949d1b277 --- /dev/null +++ b/results_summary/no_map_contention.md @@ -0,0 +1,149 @@ + +``` +2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores +------- 2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores Run Metrics ------- +Throughput: 33.777 +CPU Util: 100.0 +Drop rate: 1.13766e-05 +Acks per page: 0.030273219859667824 +Per page stats: + IOTLB Miss: 0.6100415164553394 + IOTLB First Lookup: 9.274437391044557 + IOTLB All Lookups: 13.4962224235957 + IOTLB Inv: 0.19150809643579952 + IOMMU Mem Access: 1.2722873676965984 + PWT Occupancy: 107436801.353 +Reading eBPF stats from ../utils/reports/2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 185965 640924744 3446.48 0.35 0.009020 +1 __iommu_map -1 185923 154941223 833.36 -0.45 0.009018 +2 intel_iommu_iotlb_sync_map -1 185853 246528053 1326.47 -0.32 0.009015 +3 cache_tag_flush_range_np -1 185791 88213903 474.80 -0.21 0.009012 +4 __iommu_unmap -1 185587 182778932 984.87 -0.41 0.009002 +5 intel_iommu_tlb_sync -1 185527 2242903899 12089.37 355.10 0.008999 +6 cache_tag_flush_range -1 185469 2055926487 11085.01 353.62 0.008996 +7 qi_submit_sync -1 185214 1609447364 8689.66 346.07 0.008984 +8 qi_batch_flush_descs -1 185283 1791294457 9667.88 346.72 0.008987 +9 trace_qi_submit_sync_cs -1 185095 1430565528 7728.82 345.51 0.008978 +10 page_pool_put_unrefed_netmem -1 10875935 5188747098 477.09 -0.16 0.527552 +11 page_pool_put_unrefed_page -1 322 490917 1524.59 -0.86 0.000016 +``` + +``` +2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores +------- 2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores Run Metrics ------- +Throughput: 153.819 +CPU Util: 99.97525 +Drop rate: 1.0829e-06 +Acks per page: 0.017175458220375896 +Per page stats: + IOTLB Miss: 1.1499140474453742 + IOTLB First Lookup: 9.811289727068827 + IOTLB All Lookups: 18.258072435992954 + IOTLB Inv: 0.11088180014172501 + IOMMU Mem Access: 2.3453380875964607 + PWT Occupancy: 880284529.062 +Reading eBPF stats from ../utils/reports/2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 722850 2635303813 3645.71 0.25 0.007699 +1 __iommu_map -1 722652 677524944 937.55 -0.54 0.007697 +2 intel_iommu_iotlb_sync_map -1 722426 986897501 1366.09 -0.33 0.007695 +3 cache_tag_flush_range_np -1 722210 361248452 500.20 -0.22 0.007693 +4 __iommu_unmap -1 721489 644919374 893.87 -0.48 0.007685 +5 intel_iommu_tlb_sync -1 721270 11242682556 15587.34 566.26 0.007683 +6 cache_tag_flush_range -1 721054 10572805730 14662.99 563.98 0.007680 +7 qi_submit_sync -1 720078 6945888032 9646.02 144.09 0.007670 +8 qi_batch_flush_descs -1 720285 7646620349 10616.10 147.27 0.007672 +9 trace_qi_submit_sync_cs -1 719617 6200236332 8616.02 142.65 0.007665 +10 page_pool_put_unrefed_netmem -1 40070637 19987890456 498.82 -0.18 0.426812 +11 page_pool_put_unrefed_page -1 6022 7852840 1304.03 -0.61 0.000064 +``` +``` +2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores +------- 2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores Run Metrics ------- +Throughput: 252.115 +CPU Util: 99.98775 +Drop rate: 6.649e-07 +Acks per page: 0.016678686497828374 +Per page stats: + IOTLB Miss: 1.1757468204891262 + IOTLB First Lookup: 9.735756925468076 + IOTLB All Lookups: 18.046871112246397 + IOTLB Inv: 0.1095388821133213 + IOMMU Mem Access: 2.391131853955536 + PWT Occupancy: 1533534976.375 +Reading eBPF stats from ../utils/reports/2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 1092967 7100356604 6.496410e+03 2.270000e+00 0.007103 +1 __iommu_map -1 1092570 1736365257 1.589250e+03 -6.600000e-01 0.007100 +2 intel_iommu_iotlb_sync_map -1 1092176 2860565448 2.619140e+03 -9.000000e-02 0.007098 +3 cache_tag_flush_range_np -1 1091771 1190819447 1.090720e+03 1.500000e-01 0.007095 +4 __iommu_unmap -1 1090475 1994403509 1.828930e+03 -8.500000e-01 0.007087 +5 intel_iommu_tlb_sync -1 1090061 30123622708 2.763480e+04 1.028200e+03 0.007084 +6 cache_tag_flush_range -1 1089638 28168127312 2.585090e+04 1.032220e+03 0.007081 +7 qi_submit_sync -1 1087853 11577580347 1.064260e+04 1.053200e+02 0.007070 +8 qi_batch_flush_descs -1 1088263 13630305155 1.252483e+04 1.047500e+02 0.007072 +9 trace_qi_submit_sync_cs -1 1087121 9832391614 9.044430e+03 1.046100e+02 0.007065 +10 page_pool_put_unrefed_netmem -1 47688962 59111728008 1.239530e+03 3.600000e-01 0.309913 +11 page_pool_put_unrefed_page -1 48911 5586076335011 1.142090e+08 1.063145e+14 0.000318 +``` + +``` +2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores +------- 2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores Run Metrics ------- +Throughput: 32.966 +CPU Util: 100.0 +Drop rate: 0.0033811853 +Acks per page: 0.017448815870897288 +Per page stats: + IOTLB Miss: 1.162521051253291 + IOTLB First Lookup: 9.973923076612753 + IOTLB All Lookups: 18.512692914639324 + IOTLB Inv: 0.09580553492640902 + IOMMU Mem Access: 2.338268557908148 + PWT Occupancy: 373632677.125 +Reading eBPF stats from ../utils/reports/2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 977974 16209789238 16574.87 18.81 0.048605 +1 __iommu_map -1 941659 3587684102 3809.96 -0.08 0.046800 +2 intel_iommu_iotlb_sync_map -1 908413 5957618295 6558.27 -1.48 0.045148 +3 cache_tag_flush_range_np -1 882626 2304886770 2611.40 0.21 0.043866 +4 __iommu_unmap -1 776352 2611790623 3364.18 -0.38 0.038584 +5 intel_iommu_tlb_sync -1 744850 576992512879 774642.56 1626457.60 0.037019 +6 cache_tag_flush_range -1 708345 552500163230 779987.38 1649900.32 0.035205 +7 qi_submit_sync -1 560644 32970628092 58808.49 2006.08 0.027864 +8 qi_batch_flush_descs -1 590359 37750470975 63944.94 2183.71 0.029341 +9 trace_qi_submit_sync_cs -1 497435 26742357382 53760.51 1873.72 0.024722 +10 page_pool_put_unrefed_netmem -1 22522391 42723491010 1896.93 -1.35 1.119356 +11 page_pool_put_unrefed_page -1 308 2646665 8593.07 2.52 0.000015 +``` + +``` +2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores +------- 2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores Run Metrics ------- +Throughput: 36.873 +CPU Util: 99.996875 +Drop rate: 0.0219449664 +Acks per page: 0.017521011298239905 +Per page stats: + IOTLB Miss: 1.204611738226236 + IOTLB First Lookup: 10.431303739429936 + IOTLB All Lookups: 19.380203838255635 + IOTLB Inv: 0.10309438602771676 + IOMMU Mem Access: 2.46238960843978 + PWT Occupancy: 472869746.188 +Reading eBPF stats from ../utils/reports/2025-10-12-22-13-10-6.12.9-iommufd-no-map-contention-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores-RUN-0/ebpf_guest_stats.csv + function type count total_duration_ns mean_ns variance_us count_per_page +0 iommu_map -1 1170110 19258201417 16458.45 14.03 0.051992 +1 __iommu_map -1 1126872 4284106435 3801.77 0.14 0.050071 +2 intel_iommu_iotlb_sync_map -1 1077658 7001216264 6496.70 -2.80 0.047884 +3 cache_tag_flush_range_np -1 1036637 2659680166 2565.68 -0.02 0.046062 +4 __iommu_unmap -1 907219 3121130760 3440.33 0.06 0.040311 +5 intel_iommu_tlb_sync -1 861812 890860506358 1033706.31 3577537.57 0.038293 +6 cache_tag_flush_range -1 815331 849000933555 1041296.03 3615449.43 0.036228 +7 qi_submit_sync -1 626149 36882805549 58904.20 2335.95 0.027822 +8 qi_batch_flush_descs -1 673511 42958388261 63782.76 2504.17 0.029927 +9 trace_qi_submit_sync_cs -1 544099 29367935240 53975.35 2217.12 0.024176 +10 page_pool_put_unrefed_netmem -1 22786117 45765700956 2008.49 -1.64 1.012469 +11 page_pool_put_unrefed_page -1 1049 8903164 8487.29 -4.08 0.000047 +``` From 8f22ebe38e0a0f50de72636bf44834c2b6c44179 Mon Sep 17 00:00:00 2001 From: leshnabalara Date: Mon, 20 Oct 2025 04:23:46 -0500 Subject: [PATCH 07/18] mystery changes siyuan made --- plots/plot_3virt.py | 72 ++++++++++-- scripts/report-tput-metrics.py | 54 ++++++++- scripts/run.sh | 12 ++ scripts/sosp24-experiments/vm_flows_exp.sh | 8 +- tracing/guest_loader.c | 2 +- utils/reports/ebpf.md | 127 --------------------- utils/reports/no_ebpf.md | 90 --------------- 7 files changed, 128 insertions(+), 237 deletions(-) create mode 100755 scripts/run.sh delete mode 100644 utils/reports/ebpf.md delete mode 100644 utils/reports/no_ebpf.md diff --git a/plots/plot_3virt.py b/plots/plot_3virt.py index 85e00a256..628243853 100644 --- a/plots/plot_3virt.py +++ b/plots/plot_3virt.py @@ -133,32 +133,82 @@ def plot_iommu_misses_stats(iommu_on_data, x_labels, title, x_label): plt.close() -def plot_bars3(host_strict_guest_off, host_strict_guest_shadow, host_strict_guest_nested, x_labels, title, xlabel, ylabel): +# def plot_bars3(host_strict_guest_off, host_strict_guest_shadow, host_strict_guest_nested, x_labels, title, xlabel, ylabel): +# print(host_strict_guest_off) +# print(host_strict_guest_shadow) +# print(host_strict_guest_nested) +# print(x_labels) +# print(title) +# print(xlabel) +# print(ylabel) +# bar_width = 0.3 +# gap_factor = 1.5 +# x = np.arange(len(x_labels)) * gap_factor +# plt.bar(x - bar_width, host_strict_guest_off, bar_width, label='Host IOMMU Strict; Guest IOMMU Off') +# plt.bar(x, host_strict_guest_shadow, bar_width, label='Host IOMMU Strict; Guest IOMMU Shadow') +# plt.bar(x + bar_width, host_strict_guest_nested, bar_width, label='Host IOMMU Strict; Guest IOMMU Nested') + +# plt.xlabel(xlabel) +# plt.ylabel(ylabel) +# plt.title(title) +# plt.xticks(x, x_labels) + +# plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2) +# plt.subplots_adjust(bottom=0.25) + +# plt.savefig(title + '.png', bbox_inches='tight') +# print('Saved plot to ' + title + '.png') +# plt.close() + +def plot_bars3(host_strict_guest_off, host_strict_guest_shadow, host_strict_guest_nested, x_labels, title, xlabel, ylabel): print(host_strict_guest_off) print(host_strict_guest_shadow) print(host_strict_guest_nested) print(x_labels) - print(title) - print(xlabel) - print(ylabel) - bar_width = 0.3 + print(title, xlabel, ylabel) + + plt.figure(figsize=(10, 6)) + plt.rcParams.update({'font.size': 12}) + + bar_width = 0.38 gap_factor = 1.5 x = np.arange(len(x_labels)) * gap_factor - plt.bar(x - bar_width, host_strict_guest_off, bar_width, label='Host IOMMU Strict; Guest IOMMU Off') - plt.bar(x, host_strict_guest_shadow, bar_width, label='Host IOMMU Strict; Guest IOMMU Shadow') - plt.bar(x + bar_width, host_strict_guest_nested, bar_width, label='Host IOMMU Strict; Guest IOMMU Nested') + + colors = ['#0072B2', '#E69F00', '#009E73'] + + bars_off = plt.bar(x - bar_width, host_strict_guest_off, bar_width, + color=colors[0], label='Host Strict; Guest Off') + bars_shadow = plt.bar(x, host_strict_guest_shadow, bar_width, + color=colors[1], label='Host Strict; Guest Shadow') + bars_nested = plt.bar(x + bar_width, host_strict_guest_nested, bar_width, + color=colors[2], label='Host Strict; Guest Nested') plt.xlabel(xlabel) plt.ylabel(ylabel) plt.title(title) plt.xticks(x, x_labels) - + plt.grid(axis='y', linestyle='--', alpha=0.7) plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=2) - plt.subplots_adjust(bottom=0.25) + + def add_labels(bars): + for bar in bars: + height = bar.get_height() + plt.text( + bar.get_x() + bar.get_width() / 2, + height + (0.02 * max(host_strict_guest_off + host_strict_guest_shadow + host_strict_guest_nested)), + f"{height:.1f}", + ha='center', va='bottom', fontsize=12 + ) + + add_labels(bars_off) + add_labels(bars_shadow) + add_labels(bars_nested) + + plt.tight_layout(rect=[0, 0, 1, 0.95]) plt.savefig(title + '.png', bbox_inches='tight') - print('Saved plot to ' + title + '.png') + print(f'Saved plot to {title}.png') plt.close() def plot_all_subplots(host_strict_guest_off_data, host_strict_guest_shadow_data, host_strict_guest_nested_data, x_labels, title_key, xlabel): diff --git a/scripts/report-tput-metrics.py b/scripts/report-tput-metrics.py index 58e369f38..a54eacd89 100644 --- a/scripts/report-tput-metrics.py +++ b/scripts/report-tput-metrics.py @@ -30,9 +30,55 @@ def __get_ebpf_stats(exp_name, run_id): if not os.path.exists(ebpf_path): return None print(f"Reading eBPF stats from {ebpf_path}") - ebpf_results = pd.read_csv(ebpf_path) - # only show aggragete results - ebpf_results = ebpf_results[ebpf_results['core'] == -1] + +# # Per-Function Latency Statistics +# function,type,count,total_duration_ns,mean_ns,variance_us +# iommu_map,-1,171036,1139456425,6662.09,-4.24 +# __iommu_map,-1,170998,185684984,1085.89,-0.69 +# intel_iommu_iotlb_sync_map,-1,170922,686091943,4014.06,-3.05 +# cache_tag_flush_range_np,-1,170866,498774037,2919.09,-2.03 +# iommu_flush_write_buffer,-1,170811,118642694,694.58,-0.26 +# __iommu_unmap,-1,170698,186294285,1091.37,-0.74 +# intel_iommu_tlb_sync,-1,170646,2118537135,12414.81,385.33 +# cache_tag_flush_range,-1,170591,1922281964,11268.37,383.70 +# qi_submit_sync,-1,170362,1494422523,8772.04,378.01 +# qi_batch_flush_descs,-1,340833,1807166455,5302.21,210.82 +# trace_qi_submit_sync_cs,-1,170252,1294629687,7604.20,377.08 +# page_pool_put_unrefed_netmem,-1,8695550,6395061571,735.44,-0.28 +# page_pool_put_unrefed_page,-1,122,266038,2180.64,-1.07 +# # Per-Function Per-CPU Counts + + # Read the ebpf_guest_stats.csv file and extract the lines between: + # "# Per-Function Latency Statistics" and "# Per-Function Per-CPU Counts" + # Return as a pandas DataFrame with columns: function, type, count, total_duration_ns, mean_ns, variance_us + + with open(ebpf_path, 'r') as f: + lines = f.readlines() + + start_idx = None + end_idx = None + for i, line in enumerate(lines): + if line.strip().startswith("# Per-Function Latency Statistics"): + start_idx = i + 1 + if line.strip().startswith("# Per-Function Per-CPU Counts"): + end_idx = i + break + + if start_idx is None or end_idx is None or end_idx <= start_idx: + return None + + # The first line after start_idx is the header + header = lines[start_idx].strip() + data_lines = [l.strip() for l in lines[start_idx+1:end_idx] if l.strip() and not l.strip().startswith("#")] + + from io import StringIO + csv_content = header + "\n" + "\n".join(data_lines) + ebpf_results = pd.read_csv(StringIO(csv_content)) + + # print(ebpf_results.to_string()) + # ebpf_results = pd.read_csv(ebpf_path) + # # only show aggragete results + # ebpf_results = ebpf_results[ebpf_results['core'] == -1] # return {key: ebpf_results[key] for key in ebpf_results.dtype.names} return ebpf_results @@ -46,7 +92,7 @@ def get_ebpf_stats(exp_name, tput, profile_duration=20): continue total_data = tput * 1e9 / 8 * profile_duration # bytes total_pages = total_data / 4096 - run_stats['count_per_page'] = run_stats['count'] / total_pages + run_stats['count_per_page'] = run_stats['cpu'] / total_pages run_stats = run_stats.reset_index(drop=True) print(run_stats.to_string()) diff --git a/scripts/run.sh b/scripts/run.sh new file mode 100755 index 000000000..27bb6f4c9 --- /dev/null +++ b/scripts/run.sh @@ -0,0 +1,12 @@ +#!/bin/bash +python3 report-tput-metrics.py +exps=( + "2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores" + "2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores" + "2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores" + "2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores" +) + +for exp in "${exps[@]}"; do + python3 report-tput-metrics.py $exp tput,drops,acks,iommu,cpu | tee ../utils/reports/$exp_name/summary.txt +done diff --git a/scripts/sosp24-experiments/vm_flows_exp.sh b/scripts/sosp24-experiments/vm_flows_exp.sh index 80ceb43df..35ded0b40 100755 --- a/scripts/sosp24-experiments/vm_flows_exp.sh +++ b/scripts/sosp24-experiments/vm_flows_exp.sh @@ -97,10 +97,10 @@ timestamp=$(date '+%Y-%m-%d-%H-%M-%S') for socket_buf in 1; do for ring_buffer in 512; do # 5 10 20 40 - for i in 1 4 8 20; do - num_cores=$i - client_cores_mask=($(echo $client_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) - server_cores_mask=($(echo $server_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) + for i in 1 4 8 16 20; do + num_cores=$i + client_cores_mask=($(echo $client_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) + server_cores_mask=($(echo $server_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) format_i=$(printf "%02d\n" $i) exp_name="${timestamp}-$(uname -r)-flow${format_i}-${iommu_config}-ringbuf-${ring_buffer}_sokcetbuf${socket_buf}_${num_cores}cores" diff --git a/tracing/guest_loader.c b/tracing/guest_loader.c index 63216fa02..2a9df6950 100644 --- a/tracing/guest_loader.c +++ b/tracing/guest_loader.c @@ -275,7 +275,7 @@ static void dump_aggregate_to_file(FILE *fp, struct guest_tracer_bpf *skel) for (int cpu = 0; cpu < num_cpus; cpu++) { struct latency_stats_t *s = &percpu_stats[cpu]; - if (s->count == 0) continue; + // if (s->count == 0) continue; fprintf(fp, "%s,%d,%llu,%llu,%.2f,%.2f\n", fn_name, diff --git a/utils/reports/ebpf.md b/utils/reports/ebpf.md deleted file mode 100644 index 82edc7caa..000000000 --- a/utils/reports/ebpf.md +++ /dev/null @@ -1,127 +0,0 @@ -Usage: python script.py - -1 flow & core -``` -------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores Run Metrics ------- -Throughput: 46.913 -CPU Util: 100.0 -Drop rate: 6.9598e-06 -Acks per page: 0.011157171172169762 -Per page stats: - IOTLB Miss: 1.0182934255241831 - IOTLB First Lookup: 9.992788985910089 - IOTLB All Lookups: 17.35123975836122 - IOTLB Inv: 0.07360253473450856 - IOMMU Mem Access: 1.9066415944407733 - PWT Occupancy: 221906394.188 -Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores-RUN-0/ebpf_guest_stats.csv - function type count total_duration_ns mean_ns variance_us count_per_page -0 iommu_map -1 171036 1139456425 6662.09 -4.24 0.005973 -1 __iommu_map -1 170998 185684984 1085.89 -0.69 0.005972 -2 intel_iommu_iotlb_sync_map -1 170922 686091943 4014.06 -3.05 0.005969 -3 cache_tag_flush_range_np -1 170866 498774037 2919.09 -2.03 0.005967 -4 iommu_flush_write_buffer -1 170811 118642694 694.58 -0.26 0.005965 -5 __iommu_unmap -1 170698 186294285 1091.37 -0.74 0.005961 -6 intel_iommu_tlb_sync -1 170646 2118537135 12414.81 385.33 0.005960 -7 cache_tag_flush_range -1 170591 1922281964 11268.37 383.70 0.005958 -8 qi_submit_sync -1 170362 1494422523 8772.04 378.01 0.005950 -9 qi_batch_flush_descs -1 340833 1807166455 5302.21 210.82 0.011903 -10 trace_qi_submit_sync_cs -1 170252 1294629687 7604.20 377.08 0.005946 -11 page_pool_put_unrefed_netmem -1 8695550 6395061571 735.44 -0.28 0.303685 -12 page_pool_put_unrefed_page -1 122 266038 2180.64 -1.07 0.000004 - -``` - -4 flow & core -``` -------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores Run Metrics ------- -Throughput: 166.568 -CPU Util: 100.0 -Drop rate: 9.976e-07 -Acks per page: 0.013932173862926852 -Per page stats: - IOTLB Miss: 1.14558250952327 - IOTLB First Lookup: 9.777568002791028 - IOTLB All Lookups: 18.448538498145528 - IOTLB Inv: 0.09115335522866337 - IOMMU Mem Access: 2.3182612214913787 - PWT Occupancy: 971250965.412 -Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores-RUN-0/ebpf_guest_stats.csv - function type count total_duration_ns mean_ns variance_us count_per_page -0 iommu_map -1 648739 5528942156 8522.60 296.42 0.006381 -1 __iommu_map -1 648502 641657150 989.45 -0.60 0.006379 -2 intel_iommu_iotlb_sync_map -1 648270 3902221495 6019.44 295.04 0.006377 -3 cache_tag_flush_range_np -1 648045 3295997260 5086.06 295.88 0.006374 -4 iommu_flush_write_buffer -1 647803 333835119 515.33 -0.23 0.006372 -5 __iommu_unmap -1 647336 633511789 978.64 -0.54 0.006367 -6 intel_iommu_tlb_sync -1 647106 9527337524 14722.99 202.38 0.006365 -7 cache_tag_flush_range -1 646879 8864704505 13703.81 200.82 0.006363 -8 qi_submit_sync -1 645893 6084550333 9420.37 122.24 0.006353 -9 qi_batch_flush_descs -1 1292238 7132626300 5519.59 86.35 0.012711 -10 trace_qi_submit_sync_cs -1 645454 5362290481 8307.78 121.02 0.006349 -11 page_pool_put_unrefed_netmem -1 37297328 20335614809 545.23 -0.21 0.366865 -12 page_pool_put_unrefed_page -1 775 1347659 1738.91 -1.12 0.000008 -``` - -8 flow & core -``` -------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores Run Metrics ------- -Throughput: 159.404 -CPU Util: 99.8865 -Drop rate: 2.9706e-06 -Acks per page: 0.014431629178690623 -Per page stats: - IOTLB Miss: 1.1012391911871722 - IOTLB First Lookup: 9.37129735625204 - IOTLB All Lookups: 16.570158605605883 - IOTLB Inv: 0.11572329316463828 - IOMMU Mem Access: 2.2484629998745325 - PWT Occupancy: 894057325.062 -Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores-RUN-0/ebpf_guest_stats.csv - function type count total_duration_ns mean_ns variance_us count_per_page -0 iommu_map -1 1788292 6183041804167 3.457512e+06 5.106689e+11 0.018381 -1 __iommu_map -1 1752395 2227055514 1.270860e+03 -5.300000e-01 0.018012 -2 intel_iommu_iotlb_sync_map -1 1707617 27438328576 1.606820e+04 4.706800e+02 0.017551 -3 cache_tag_flush_range_np -1 1705916 25564375458 1.498572e+04 4.691400e+02 0.017534 -4 iommu_flush_write_buffer -1 1678465 1491011350 8.883200e+02 1.700000e-01 0.017252 -5 __iommu_unmap -1 1627399 2557927152 1.571790e+03 -5.100000e-01 0.016727 -6 intel_iommu_tlb_sync -1 1570730 35766444256 2.277059e+04 2.908800e+02 0.016144 -7 cache_tag_flush_range -1 1570406 33464521935 2.130947e+04 2.929400e+02 0.016141 -8 qi_submit_sync -1 1453295 12498719171 8.600260e+03 6.011000e+01 0.014937 -9 qi_batch_flush_descs -1 2971101 15873610964 5.342670e+03 5.162000e+01 0.030538 -10 trace_qi_submit_sync_cs -1 1392530 10196588233 7.322350e+03 6.028000e+01 0.014313 -11 page_pool_put_unrefed_netmem -1 56118048 57621406170 1.026790e+03 1.900000e-01 0.576797 -12 page_pool_put_unrefed_page -1 64079 246231280081725 3.842621e+09 3.379906e+13 0.000659 -``` - -20 flow & core -``` -------- 2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores Run Metrics ------- -Throughput: 18.0 -CPU Util: 99.99735 -Drop rate: 0.0105981691 -Acks per page: 0.02008469048888889 -Per page stats: - IOTLB Miss: 1.2731322131342222 - IOTLB First Lookup: 10.490504613432888 - IOTLB All Lookups: 18.781935896348443 - IOTLB Inv: 0.10933835639466667 - IOMMU Mem Access: 2.3686680758044445 - PWT Occupancy: 214887392.647 -Reading eBPF stats from ../utils/reports/2025-10-03-16-07-35-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores-RUN-0/ebpf_guest_stats.csv - function type count total_duration_ns mean_ns variance_us count_per_page -0 iommu_map -1 657423 811346922685 1234132.24 4078555.49 0.059840 -1 __iommu_map -1 629085 2667726698 4240.65 -0.39 0.057261 -2 intel_iommu_iotlb_sync_map -1 604227 752131135238 1244782.40 4167118.65 0.054998 -3 cache_tag_flush_range_np -1 580133 724976161891 1249672.34 4207074.99 0.052805 -4 iommu_flush_write_buffer -1 558943 2076529419 3715.10 3.58 0.050876 -5 __iommu_unmap -1 499915 2036478252 4073.65 -0.81 0.045503 -6 intel_iommu_tlb_sync -1 473102 360266895752 761499.41 2695061.66 0.043063 -7 cache_tag_flush_range -1 450158 344191430438 764601.39 2727268.43 0.040974 -8 qi_submit_sync -1 359073 27308442500 76052.62 2515.41 0.032684 -9 qi_batch_flush_descs -1 752523 31516145928 41880.64 2880.87 0.068496 -10 trace_qi_submit_sync_cs -1 318545 22430274841 70414.78 2367.43 0.028995 -11 page_pool_put_unrefed_netmem -1 12757594 26657209578 2089.52 -1.24 1.161225 -12 page_pool_put_unrefed_page -1 1239 11756313 9488.55 -1.85 0.000113 -``` - diff --git a/utils/reports/no_ebpf.md b/utils/reports/no_ebpf.md deleted file mode 100644 index 6cfbcc13c..000000000 --- a/utils/reports/no_ebpf.md +++ /dev/null @@ -1,90 +0,0 @@ - -Nested, no ebpf tracing -1 flow, 1 core -``` -2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores -------- 2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow01-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_1cores Run Metrics ------- -Throughput: 43.481 -CPU Util: 72.67 -Drop rate: 1.25985e-05 -Acks per page: 0.018491149476782962 -Per page stats: - IOTLB Miss: 0.9459983733393436 - IOTLB First Lookup: 10.262835799972954 - IOTLB All Lookups: 16.873251969221684 - IOTLB Inv: 0.10161880412759597 - IOMMU Mem Access: 1.9011407753588005 - PWT Occupancy: 205601686.882 -``` - -4 flow, 4 core - -``` -2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores -------- 2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow04-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_4cores Run Metrics ------- -Throughput: 165.123 -CPU Util: 93.451375 -Drop rate: 9.771e-07 -Acks per page: 0.015494842821411916 -Per page stats: - IOTLB Miss: 1.1755971450774514 - IOTLB First Lookup: 9.868652860893468 - IOTLB All Lookups: 18.487392099760978 - IOTLB Inv: 0.09163031913053905 - IOMMU Mem Access: 2.360826030421637 - PWT Occupancy: 955914720.765 -``` - -8 flow, 8 core - -``` -2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores -------- 2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow08-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_8cores Run Metrics ------- -Throughput: 229.186 -CPU Util: 99.2229375 -Drop rate: 2.0139e-06 -Acks per page: 0.017781929381375825 -Per page stats: - IOTLB Miss: 1.125802439877619 - IOTLB First Lookup: 9.977624758025778 - IOTLB All Lookups: 18.427204986125716 - IOTLB Inv: 0.11165543759608353 - IOMMU Mem Access: 2.386134777764924 - PWT Occupancy: 1371796173.0 -``` - -16 flow, 16 core - -``` -2025-10-03-17-24-12-6.12.9-iommufd-extra-hooks-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores -------- 2025-10-03-17-24-12-6.12.9-iommufd-extra-hooks-flow16-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_16cores Run Metrics ------- -Throughput: 25.29 -CPU Util: 99.99375 -Drop rate: 0.0018222967 -Acks per page: 0.01729804450771056 -Per page stats: - IOTLB Miss: 1.1352454320132859 - IOTLB First Lookup: 10.149211487860814 - IOTLB All Lookups: 19.087077295991143 - IOTLB Inv: 0.10629719191838673 - IOMMU Mem Access: 2.340058162450929 - PWT Occupancy: 314648489.688 -``` - -20 flows 20 cores -``` -2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores -------- 2025-10-03-16-32-21-6.12.9-iommufd-extra-hooks-flow20-host-strict-guest-on-nested-ringbuf-512_sokcetbuf1_20cores Run Metrics ------- -Throughput: 25.325 -CPU Util: 100.0 -Drop rate: 0.0049857471 -Acks per page: 0.017037289761105627 -Per page stats: - IOTLB Miss: 1.1891126457189338 - IOTLB First Lookup: 10.070045222068943 - IOTLB All Lookups: 19.148399357017965 - IOTLB Inv: 0.10229347197599209 - IOMMU Mem Access: 2.3235780622556366 - PWT Occupancy: 299483021.235 -``` - From 84ab50910feee1dcb22f7970bc271f22ad0944bd Mon Sep 17 00:00:00 2001 From: Leshna Balara Date: Mon, 20 Oct 2025 04:26:42 -0500 Subject: [PATCH 08/18] add ioctl call tracing --- tracing/host_aggregate.csv | 9 --------- tracing/host_qemu_loader.c | 36 +++++++++++++++++++++------------- tracing/host_qemu_tracer.bpf.c | 25 +++++++++++++++++++++++ tracing/tracing_utils.h | 1 + 4 files changed, 48 insertions(+), 23 deletions(-) delete mode 100644 tracing/host_aggregate.csv diff --git a/tracing/host_aggregate.csv b/tracing/host_aggregate.csv deleted file mode 100644 index 5611d58de..000000000 --- a/tracing/host_aggregate.csv +++ /dev/null @@ -1,9 +0,0 @@ -function,count,total_duration_ns,mean_ns,variance_us -iommu_map,187,885760,4736.68,1.00 -__iommu_map,3077,1804671,586.50,2.37 -intel_iommu_iotlb_sync_map,756,674235,891.85,6.98 -__iommu_unmap,629,517110,822.11,-0.22 -intel_iommu_tlb_sync,627,1296286,2067.44,-1.53 -qemu:address_space_rw,123,1098054,8927.27,13.23 -qemu:vtd_mem_write,6,320560,53426.67,-31.08 -qemu:vtd_iommu_translate,9,155994,17332.67,230.47 diff --git a/tracing/host_qemu_loader.c b/tracing/host_qemu_loader.c index 3694787ff..1da3c6133 100644 --- a/tracing/host_qemu_loader.c +++ b/tracing/host_qemu_loader.c @@ -96,20 +96,24 @@ typedef struct } probe_def_t; probe_def_t probes_to_attach[] = { - {"kprobe_iommu_map", "iommu_map", PROBE_TYPE_KPROBE, IOMMU_MAP, false}, - {"kretprobe_iommu_map", "iommu_map", PROBE_TYPE_KRETPROBE, IOMMU_MAP, false}, - {"kprobe___iommu_map", "__iommu_map", PROBE_TYPE_KPROBE, IOMMU_MAP_INTERNAL, false}, - {"kretprobe___iommu_map", "__iommu_map", PROBE_TYPE_KRETPROBE, IOMMU_MAP_INTERNAL, false}, - {"kprobe_intel_iommu_iotlb_sync_map", "intel_iommu_iotlb_sync_map", PROBE_TYPE_KPROBE, IOMMU_IOTLB_SYNC_MAP, false}, - {"kretprobe_intel_iommu_iotlb_sync_map", "intel_iommu_iotlb_sync_map", PROBE_TYPE_KRETPROBE, IOMMU_IOTLB_SYNC_MAP, false}, - {"kprobe_iommu_unmap", "iommu_unmap", PROBE_TYPE_KPROBE, IOMMU_UNMAP, false}, - {"kretprobe_iommu_unmap", "iommu_unmap", PROBE_TYPE_KRETPROBE, IOMMU_UNMAP, false}, - {"kprobe___iommu_unmap", "__iommu_unmap", PROBE_TYPE_KPROBE, IOMMU_UNMAP_INTERNAL, false}, - {"kretprobe___iommu_unmap", "__iommu_unmap", PROBE_TYPE_KRETPROBE, IOMMU_UNMAP_INTERNAL, false}, - {"kprobe_intel_iommu_tlb_sync", "intel_iommu_tlb_sync", PROBE_TYPE_KPROBE, IOMMU_TLB_SYNC, false}, - {"kretprobe_intel_iommu_tlb_sync", "intel_iommu_tlb_sync", PROBE_TYPE_KRETPROBE, IOMMU_TLB_SYNC, false}, - {"uprobe_vtd_fetch_inv_desc", "vtd_fetch_inv_desc", PROBE_TYPE_UPROBE, QEMU_VTD_FETCH_INV_DESC, true}, - {"uretprobe_vtd_fetch_inv_desc", "vtd_fetch_inv_desc", PROBE_TYPE_URETPROBE, QEMU_VTD_FETCH_INV_DESC, true}, + // {"kprobe_iommu_map", "iommu_map", PROBE_TYPE_KPROBE, IOMMU_MAP, false}, + // {"kretprobe_iommu_map", "iommu_map", PROBE_TYPE_KRETPROBE, IOMMU_MAP, false}, + // {"kprobe___iommu_map", "__iommu_map", PROBE_TYPE_KPROBE, IOMMU_MAP_INTERNAL, false}, + // {"kretprobe___iommu_map", "__iommu_map", PROBE_TYPE_KRETPROBE, IOMMU_MAP_INTERNAL, false}, + // {"kprobe_intel_iommu_iotlb_sync_map", "intel_iommu_iotlb_sync_map", PROBE_TYPE_KPROBE, IOMMU_IOTLB_SYNC_MAP, false}, + // {"kretprobe_intel_iommu_iotlb_sync_map", "intel_iommu_iotlb_sync_map", PROBE_TYPE_KRETPROBE, IOMMU_IOTLB_SYNC_MAP, false}, + // {"kprobe_iommu_unmap", "iommu_unmap", PROBE_TYPE_KPROBE, IOMMU_UNMAP, false}, + // {"kretprobe_iommu_unmap", "iommu_unmap", PROBE_TYPE_KRETPROBE, IOMMU_UNMAP, false}, + // {"kprobe___iommu_unmap", "__iommu_unmap", PROBE_TYPE_KPROBE, IOMMU_UNMAP_INTERNAL, false}, + // {"kretprobe___iommu_unmap", "__iommu_unmap", PROBE_TYPE_KRETPROBE, IOMMU_UNMAP_INTERNAL, false}, + // {"kprobe_intel_iommu_tlb_sync", "intel_iommu_tlb_sync", PROBE_TYPE_KPROBE, IOMMU_TLB_SYNC, false}, + // {"kretprobe_intel_iommu_tlb_sync", "intel_iommu_tlb_sync", PROBE_TYPE_KRETPROBE, IOMMU_TLB_SYNC, false}, + // {"uprobe_vtd_fetch_inv_desc", "vtd_fetch_inv_desc", PROBE_TYPE_UPROBE, QEMU_VTD_FETCH_INV_DESC, true}, + // {"uretprobe_vtd_fetch_inv_desc", "vtd_fetch_inv_desc", PROBE_TYPE_URETPROBE, QEMU_VTD_FETCH_INV_DESC, true}, + {"kprobe_qi_submit_sync", "qi_submit_sync", PROBE_TYPE_KPROBE, QI_SUBMIT_SYNC }, + {"kretprobe_qi_submit_sync", "qi_submit_sync", PROBE_TYPE_KRETPROBE, QI_SUBMIT_SYNC}, + {"kprobe_iommufd_fops_ioctl", "iommufd_fops_ioctl", PROBE_TYPE_KPROBE, IOMMUFD_FOPS_IOCTL }, + {"kretprobe_iommufd_fops_ioctl", "iommufd_fops_ioctl", PROBE_TYPE_KRETPROBE, IOMMUFD_FOPS_IOCTL}, // {"uprobe_address_space_rw", "address_space_rw", PROBE_TYPE_UPROBE, QEMU_ADDRESS_SPACE_RW, true}, // {"uretprobe_address_space_rw", "address_space_rw", PROBE_TYPE_URETPROBE, QEMU_ADDRESS_SPACE_RW, true}, // {"uprobe_address_space_write", "address_space_write", PROBE_TYPE_UPROBE, QEMU_ADDRESS_SPACE_WRITE, true}, @@ -151,6 +155,10 @@ const char *func_name_to_string(enum FunctionName fn) return "__iommu_unmap"; case IOMMU_TLB_SYNC: return "intel_iommu_tlb_sync"; + case QI_SUBMIT_SYNC: + return "qi_submit_sync"; + case IOMMUFD_FOPS_IOCTL: + return "iommufd_fops_ioctl"; default: return "UnknownFunction"; } diff --git a/tracing/host_qemu_tracer.bpf.c b/tracing/host_qemu_tracer.bpf.c index dd7b5c4a4..21cf2cacb 100644 --- a/tracing/host_qemu_tracer.bpf.c +++ b/tracing/host_qemu_tracer.bpf.c @@ -146,6 +146,31 @@ int BPF_KRETPROBE(kretprobe_intel_iommu_tlb_sync) return _bpf_utils_trace_func_exit(ctx, HOST, false); } +SEC("kretprobe/qi_submit_sync") +int BPF_KRETPROBE(kprobe_qi_submit_sync, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/qi_submit_sync") +int BPF_KRETPROBE(kretprobe_qi_submit_sync, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, HOST, false); +} + +// iommufd_fops_ioctl +SEC("kretprobe/iommufd_fops_ioctl") +int BPF_KRETPROBE(kprobe_iommufd_fops_ioctl, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/iommufd_fops_ioctl") +int BPF_KRETPROBE(kretprobe_iommufd_fops_ioctl, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, HOST, false); +} + SEC("uprobe//home/lbalara/viommu/qemu-nested/build/qemu-system-x86_64:vtd_fetch_inv_desc") int BPF_UPROBE(uprobe_vtd_fetch_inv_desc, void *s) { diff --git a/tracing/tracing_utils.h b/tracing/tracing_utils.h index e4a7c622d..40392b4a2 100644 --- a/tracing/tracing_utils.h +++ b/tracing/tracing_utils.h @@ -52,6 +52,7 @@ enum FunctionName PAGE_POOL_PUT_NETMEM, PAGE_POOL_PUT_PAGE, QEMU_VTD_FETCH_INV_DESC, + IOMMUFD_FOPS_IOCTL, TRACE_FUNCS_END, // Marks the end of trace functions // --- Section for Simple Frequency Counting --- From de20c68ded082cd765a171dd76cf66a85d247749 Mon Sep 17 00:00:00 2001 From: Leshna Balara Date: Mon, 20 Oct 2025 04:57:33 -0500 Subject: [PATCH 09/18] add probes to modules --- tracing/guest_aggregate.csv | 0 tracing/guest_loader.c | 28 ++++++++++++++-------------- 2 files changed, 14 insertions(+), 14 deletions(-) delete mode 100644 tracing/guest_aggregate.csv diff --git a/tracing/guest_aggregate.csv b/tracing/guest_aggregate.csv deleted file mode 100644 index e69de29bb..000000000 diff --git a/tracing/guest_loader.c b/tracing/guest_loader.c index 2a9df6950..a5ae95e93 100644 --- a/tracing/guest_loader.c +++ b/tracing/guest_loader.c @@ -16,7 +16,7 @@ #include "tracing_utils.h" #define PERF_BUFFER_PAGES 64 -#define MAX_PROBES 50 +#define MAX_PROBES 127 #define MAX_STACK_DEPTH 127 static volatile bool exiting = false; @@ -126,16 +126,16 @@ probe_def_t probes_to_attach[] = { {"kretprobe_qi_submit_sync", "qi_submit_sync", PROBE_TYPE_KRETPROBE, QI_SUBMIT_SYNC,NULL}, {"kprobe_page_pool_dma_map", "page_pool_dma_map", PROBE_TYPE_KPROBE, PAGE_POOL_DMA_MAP,NULL}, {"kretprobe_page_pool_dma_map", "page_pool_dma_map", PROBE_TYPE_KRETPROBE, PAGE_POOL_DMA_MAP,NULL}, - // {"kprobe_trace_mlx5e_tx_dma_unmap_ktls_hook", "trace_mlx5e_tx_dma_unmap_ktls_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_TX_DMA_UNMAP_KTLS_HOOK,"mlx5_core"}, - // {"kretprobe_trace_mlx5e_tx_dma_unmap_ktls_hook", "trace_mlx5e_tx_dma_unmap_ktls_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_TX_DMA_UNMAP_KTLS_HOOK,"mlx5_core"}, - // {"kprobe_trace_mlx5e_dma_push_build_single_hook", "trace_mlx5e_dma_push_build_single_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_BUILD_SINGLE_HOOK,"mlx5_core"}, - // {"kretprobe_trace_mlx5e_dma_push_build_single_hook", "trace_mlx5e_dma_push_build_single_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_BUILD_SINGLE_HOOK,"mlx5_core"}, - // {"kprobe_trace_mlx5e_dma_push_xmit_single_hook", "trace_mlx5e_dma_push_xmit_single_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_XMIT_SINGLE_HOOK,"mlx5_core"}, - // {"kretprobe_trace_mlx5e_dma_push_xmit_single_hook", "trace_mlx5e_dma_push_xmit_single_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_XMIT_SINGLE_HOOK,"mlx5_core"}, - // {"kprobe_trace_mlx5e_dma_push_page_hook", "trace_mlx5e_dma_push_page_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_PAGE_HOOK,"mlx5_core"}, - // {"kretprobe_trace_mlx5e_dma_push_page_hook", "trace_mlx5e_dma_push_page_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_PAGE_HOOK,"mlx5_core"}, - // {"kprobe_trace_mlx5e_tx_dma_unmap_hook", "trace_mlx5e_tx_dma_unmap_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_TX_DMA_UNMAP_HOOK,"mlx5_core"}, - // {"kretprobe_trace_mlx5e_tx_dma_unmap_hook", "trace_mlx5e_tx_dma_unmap_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_TX_DMA_UNMAP_HOOK,"mlx5_core"}, + {"kprobe_trace_mlx5e_tx_dma_unmap_ktls_hook", "mlx5_core:trace_mlx5e_tx_dma_unmap_ktls_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_TX_DMA_UNMAP_KTLS_HOOK,"mlx5_core"}, + {"kretprobe_trace_mlx5e_tx_dma_unmap_ktls_hook", "mlx5_core:trace_mlx5e_tx_dma_unmap_ktls_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_TX_DMA_UNMAP_KTLS_HOOK,"mlx5_core"}, + {"kprobe_trace_mlx5e_dma_push_build_single_hook", "mlx5_core:trace_mlx5e_dma_push_build_single_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_BUILD_SINGLE_HOOK,"mlx5_core"}, + {"kretprobe_trace_mlx5e_dma_push_build_single_hook", "mlx5_core:trace_mlx5e_dma_push_build_single_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_BUILD_SINGLE_HOOK,"mlx5_core"}, + {"kprobe_trace_mlx5e_dma_push_xmit_single_hook", "mlx5_core:trace_mlx5e_dma_push_xmit_single_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_XMIT_SINGLE_HOOK,"mlx5_core"}, + {"kretprobe_trace_mlx5e_dma_push_xmit_single_hook", "mlx5_core:trace_mlx5e_dma_push_xmit_single_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_XMIT_SINGLE_HOOK,"mlx5_core"}, + {"kprobe_trace_mlx5e_dma_push_page_hook", "mlx5_core:trace_mlx5e_dma_push_page_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_DMA_PUSH_PAGE_HOOK,"mlx5_core"}, + {"kretprobe_trace_mlx5e_dma_push_page_hook", "mlx5_core:trace_mlx5e_dma_push_page_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_DMA_PUSH_PAGE_HOOK,"mlx5_core"}, + {"kprobe_trace_mlx5e_tx_dma_unmap_hook", "mlx5_core:trace_mlx5e_tx_dma_unmap_hook", PROBE_TYPE_KPROBE, TRACE_MLX5E_TX_DMA_UNMAP_HOOK,"mlx5_core"}, + {"kretprobe_trace_mlx5e_tx_dma_unmap_hook", "mlx5_core:trace_mlx5e_tx_dma_unmap_hook", PROBE_TYPE_KRETPROBE, TRACE_MLX5E_TX_DMA_UNMAP_HOOK,"mlx5_core"}, {"kprobe_trace_qi_submit_sync_cs", "trace_qi_submit_sync_cs", PROBE_TYPE_KPROBE, TRACE_QI_SUBMIT_SYNC_CS,NULL}, {"kretprobe_trace_qi_submit_sync_cs", "trace_qi_submit_sync_cs", PROBE_TYPE_KRETPROBE, TRACE_QI_SUBMIT_SYNC_CS,NULL}, {"kprobe_trace_qi_submit_sync_lock_wrapper", "trace_qi_submit_sync_lock_wrapper", PROBE_TYPE_KPROBE, TRACE_QI_SUBMIT_SYNC_LOCK_WRAPPER,NULL}, @@ -151,8 +151,8 @@ probe_def_t probes_to_attach[] = { {"kprobe_page_pool_put_unrefed_page", "page_pool_put_unrefed_page", PROBE_TYPE_KPROBE, PAGE_POOL_PUT_PAGE,NULL}, {"kretprobe_page_pool_put_unrefed_page", "page_pool_put_unrefed_page", PROBE_TYPE_KRETPROBE, PAGE_POOL_PUT_PAGE,NULL}, // --- Additions for count functions --- - // {"kprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook", "count_mlx5e_alloc_rx_mpwqe_perpage_hook", PROBE_TYPE_KPROBE, COUNT_MLX5E_RX_MPWQE_PER_PAGE,"mlx5_core"}, - // {"kretprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook", "count_mlx5e_alloc_rx_mpwqe_perpage_hook", PROBE_TYPE_KRETPROBE, COUNT_MLX5E_RX_MPWQE_PER_PAGE,"mlx5_core"}, + {"kprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook", "mlx5_core:count_mlx5e_alloc_rx_mpwqe_perpage_hook", PROBE_TYPE_KPROBE, COUNT_MLX5E_RX_MPWQE_PER_PAGE,"mlx5_core"}, + {"kretprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook", "mlx5_core:count_mlx5e_alloc_rx_mpwqe_perpage_hook", PROBE_TYPE_KRETPROBE, COUNT_MLX5E_RX_MPWQE_PER_PAGE,"mlx5_core"}, {"kprobe_count_page_pool_release_page_dma_hook", "count_page_pool_release_page_dma_hook", PROBE_TYPE_KPROBE, COUNT_PAGE_POOL_RELEASE, NULL}, {"kretprobe_count_page_pool_release_page_dma_hook", "count_page_pool_release_page_dma_hook", PROBE_TYPE_KRETPROBE, COUNT_PAGE_POOL_RELEASE, NULL}, {"kprobe_count_page_pool_recycle_in_cache_hook", "count_page_pool_recycle_in_cache_hook", PROBE_TYPE_KPROBE, COUNT_PAGE_POOL_RECYCLE, NULL}, @@ -275,7 +275,7 @@ static void dump_aggregate_to_file(FILE *fp, struct guest_tracer_bpf *skel) for (int cpu = 0; cpu < num_cpus; cpu++) { struct latency_stats_t *s = &percpu_stats[cpu]; - // if (s->count == 0) continue; + if (s->count == 0) continue; fprintf(fp, "%s,%d,%llu,%llu,%.2f,%.2f\n", fn_name, From 1147eab593cf96764bd539d4d57d82cd1d29e09a Mon Sep 17 00:00:00 2001 From: leshnabalara Date: Tue, 21 Oct 2025 09:33:54 -0500 Subject: [PATCH 10/18] fix cores instead of using a random set and record vm exits --- scripts/report-tput-metrics.py | 2 +- scripts/sosp24-experiments/vm_flows_exp.sh | 52 +++++++++++++++++++--- scripts/vm-run-dctcp-tput-experiment.sh | 25 ++++++++--- 3 files changed, 67 insertions(+), 12 deletions(-) diff --git a/scripts/report-tput-metrics.py b/scripts/report-tput-metrics.py index a54eacd89..4ef862432 100644 --- a/scripts/report-tput-metrics.py +++ b/scripts/report-tput-metrics.py @@ -92,7 +92,7 @@ def get_ebpf_stats(exp_name, tput, profile_duration=20): continue total_data = tput * 1e9 / 8 * profile_duration # bytes total_pages = total_data / 4096 - run_stats['count_per_page'] = run_stats['cpu'] / total_pages + run_stats['count_per_page'] = run_stats['count'] / total_pages run_stats = run_stats.reset_index(drop=True) print(run_stats.to_string()) diff --git a/scripts/sosp24-experiments/vm_flows_exp.sh b/scripts/sosp24-experiments/vm_flows_exp.sh index 35ded0b40..3605c89c5 100755 --- a/scripts/sosp24-experiments/vm_flows_exp.sh +++ b/scripts/sosp24-experiments/vm_flows_exp.sh @@ -97,13 +97,13 @@ timestamp=$(date '+%Y-%m-%d-%H-%M-%S') for socket_buf in 1; do for ring_buffer in 512; do # 5 10 20 40 - for i in 1 4 8 16 20; do + for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32; do num_cores=$i - client_cores_mask=($(echo $client_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) - server_cores_mask=($(echo $server_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) + client_cores_mask=($(echo $client_cores | tr ',' '\n' | head -n $num_cores | tr '\n' ',')) + server_cores_mask=($(echo $server_cores | tr ',' '\n' | head -n $num_cores | tr '\n' ',')) format_i=$(printf "%02d\n" $i) - exp_name="${timestamp}-$(uname -r)-flow${format_i}-${iommu_config}-ringbuf-${ring_buffer}_sokcetbuf${socket_buf}_${num_cores}cores" + exp_name="static-${timestamp}-$(uname -r)-flow${format_i}-${iommu_config}-ringbuf-${ring_buffer}_sokcetbuf${socket_buf}_${num_cores}cores" echo $exp_name if [ "$DRY_RUN" -eq 1 ]; then @@ -116,7 +116,7 @@ for socket_buf in 1; do --host-home "$HOST_HOME" --host-ip "$HOST_IP" \ --client-ssh-name "$CLIENT_SSH_UNAME" --client-ssh-pass "$CLIENT_SSH_PASSWORD" --client-ssh-host "$CLIENT_SSH_HOST" --client-ssh-use-pass "$CLIENT_USE_PASS_AUTH" --client-ssh-ifile "$CLIENT_SSH_IDENTITY_FILE" \ -e "$exp_name" -m 4000 -r $ring_buffer -b "100g" -d 1\ - --socket-buf $socket_buf --mlc-cores 'none' --runs 1 + --socket-buf $socket_buf --mlc-cores 'none' --runs 3 # > /dev/null 2>&1 #sudo bash run-dctcp-tput-experiment.sh -E $exp_name -M 4000 --num_servers $i --num_clients $i -c "4" -m "20" --ring_buffer 256 --buf 1 --mlc_cores 'none' --bandwidth "100g" --server_intf $server_intf --client_intf $client_intf @@ -134,4 +134,44 @@ for socket_buf in 1; do # --log_file "iova.log" done done -done +done + +# Temporary loop to measure impact of core randomization +for random in 1 2 3; do + # 5 10 20 40 + for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32; do + num_cores=$i + client_cores_mask=($(echo $client_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) + server_cores_mask=($(echo $server_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) + + format_i=$(printf "%02d\n" $i) + exp_name="rand-${timestamp}-$(uname -r)-flow${format_i}-${iommu_config}-ringbuf-512_sokcetbuf1_${num_cores}cores" + echo $exp_name + + if [ "$DRY_RUN" -eq 1 ]; then + continue + fi + + sudo bash vm-run-dctcp-tput-experiment.sh \ + --guest-home "$GUEST_HOME" --guest-ip "$GUEST_IP" --guest-intf "$GUEST_INTF" --guest-bus "$GUEST_NIC_BUS" -n "$i" -c $server_cores_mask \ + --client-home "$CLIENT_HOME" --client-ip "$CLIENT_IP" --client-intf "$CLIENT_INTF" -N "$i" -C $client_cores_mask \ + --host-home "$HOST_HOME" --host-ip "$HOST_IP" \ + --client-ssh-name "$CLIENT_SSH_UNAME" --client-ssh-pass "$CLIENT_SSH_PASSWORD" --client-ssh-host "$CLIENT_SSH_HOST" --client-ssh-use-pass "$CLIENT_USE_PASS_AUTH" --client-ssh-ifile "$CLIENT_SSH_IDENTITY_FILE" \ + -e "$exp_name" -m 4000 -r 512 -b "100g" -d 1\ + --socket-buf 1 --mlc-cores 'none' --runs 1 + + # > /dev/null 2>&1 + #sudo bash run-dctcp-tput-experiment.sh -E $exp_name -M 4000 --num_servers $i --num_clients $i -c "4" -m "20" --ring_buffer 256 --buf 1 --mlc_cores 'none' --bandwidth "100g" --server_intf $server_intf --client_intf $client_intf + python3 report-tput-metrics.py $exp_name tput,drops,acks,iommu,cpu | sudo tee ../utils/reports/$exp_name/summary.txt + echo $PWD + cd ../utils/reports/$exp_name + + sudo bash -c "cat /sys/kernel/debug/tracing/trace > iova.log" + cd - + sudo chmod +666 -R ../utils/reports/$exp_name + + # python sosp24-experiments/plot_iova_logging.py \ + # --exp_folder "../utils/reports/$exp_name" \ + # --log_file "iova.log" + done +done diff --git a/scripts/vm-run-dctcp-tput-experiment.sh b/scripts/vm-run-dctcp-tput-experiment.sh index d3d055d3e..3e069596a 100755 --- a/scripts/vm-run-dctcp-tput-experiment.sh +++ b/scripts/vm-run-dctcp-tput-experiment.sh @@ -13,7 +13,7 @@ GUEST_MLC_DIR_REL="mlc/Linux" FTRACE_BUFFER_SIZE_KB=20000 FTRACE_OVERWRITE_ON_FULL=0 # 0=no overwrite (tracing stops when full), 1=overwrite -PERF_TRACING_ENABLED=0 +PERF_TRACING_ENABLED=1 # --- Base Directory Paths (Relative to respective home directories) --- GUEST_FandS_REL="viommu" @@ -301,11 +301,9 @@ cleanup() { sudo echo 0 > /sys/kernel/debug/tracing/options/overwrite sudo echo 20000 > /sys/kernel/debug/tracing/buffer_size_kb - log_info "Resetting HOST ftrace..." + log_info "Resetting HOST..." $SSH_HOST_CMD \ - "sudo bash -c 'echo 0 > /sys/kernel/debug/tracing/tracing_on; \ - echo 0 > /sys/kernel/debug/tracing/options/overwrite; \ - echo 20000 > /sys/kernel/debug/tracing/buffer_size_kb'" + "cd '$HOST_SETUP_DIR'; sudo bash reset-host.sh" log_info "Resetting GUEST network interface $GUEST_INTF..." sudo ip link set "$GUEST_INTF" down @@ -337,6 +335,8 @@ for ((j = 0; j < NUM_RUNS; j += 1)); do guest_server_app_log_file="${current_guest_reports_dir}/server_app.log" guest_mlc_log_file="${current_guest_reports_dir}/mlc.log" perf_host_data_file_remote="${host_reports_dir_remote}/perf_host_cpu.data" + perf_kvm_data_file_remote="${host_reports_dir_remote}/perf_host_kvm.data" + perf_sched_data_file_remot="${host_reports_dir_remote}/perf_host_sched.data" iova_ftrace_host_output_file_remote="${host_reports_dir_remote}/iova_ftrace_host.txt" ebpf_host_stats="${host_reports_dir_remote}/ebpf_host_stats.csv" @@ -423,6 +423,17 @@ for ((j = 0; j < NUM_RUNS; j += 1)); do log_info "Starting HOST perf record (CPU profiling) on $HOST_IP..." host_perf_cmd="sudo '$HOST_PERF' record -F 99 -a -g --call-graph dwarf -o '$perf_host_data_file_remote' -- sleep '$PROFILING_LOGGING_DUR_S'; exec bash" $SSH_HOST_CMD "screen -dmS perf_screen sudo bash -c \"$host_perf_cmd\"" + host_perf_kvm_cmd="sudo '$HOST_PERF' kvm stat record -p \$(pidof qemu-system-x86_64 | tr ' ' ,) -o '$perf_kvm_data_file_remote'; exec bash" + $SSH_HOST_CMD "screen -dmS perf_kvm_screen sudo bash -c \"$host_perf_kvm_cmd\"" + host_perf_sched_cmd="QPID=\$(pidof qemu-system-x86_64 | tr ' ' ,); \ + TIDS=\$(ps -T -p \"\$QPID\" -o tid=,comm= | awk '/CPU .*KVM/ {print \$1}' | paste -sd, -); \ + if [ -z \"\$TIDS\" ]; then \ + echo 'Error: No KVM vCPU threads found.' >&2; \ + else \ + sudo '$HOST_PERF' sched record -t \"\$TIDS\" -o '$perf_sched_data_file_remote'; \ + fi; \ + exec bash" + $SSH_HOST_CMD "screen -dmS perf_sched_screen sudo bash -c \"$host_perf_sched_cmd\"" fi log_info "Starting CLIENT-side logging on $CLIENT_SSH_HOST..." @@ -474,6 +485,10 @@ for ((j = 0; j < NUM_RUNS; j += 1)); do host_loader_basename=$(basename "$EBPF_HOST_LOADER") $SSH_HOST_CMD "sudo pkill -SIGINT -f '$host_loader_basename'" fi + if [ "$PERF_TRACING_ENABLED" -eq 1 ]; then + $SSH_HOST_CMD "screen -X -S perf_kvm_screen quit" + $SSH_HOST_CMD "screen -X -S perf_sched_screen quit" + fi # --- Transfer Report Files from Remote Machines --- From c21faf6a684c9c4080f382357995ca2e4032431f Mon Sep 17 00:00:00 2001 From: Leshna Balara Date: Tue, 21 Oct 2025 09:37:55 -0500 Subject: [PATCH 11/18] disable turbo and hyperthreading in host --- utils/reset-host.sh | 10 ++++++++++ utils/setup-host.sh | 21 +++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100755 utils/reset-host.sh mode change 100644 => 100755 utils/setup-host.sh diff --git a/utils/reset-host.sh b/utils/reset-host.sh new file mode 100755 index 000000000..f15e7de43 --- /dev/null +++ b/utils/reset-host.sh @@ -0,0 +1,10 @@ +SCRIPT_NAME="reset-host" + +CPUPOWER_PATH="/home/lbalara/linux-6.12.9/tools/power/cpupower" #TODO: HARDCODED + +LD_LIBRARY_PATH=$CPUPOWER_PATH $CPUPOWER_PATH/cpupower --cpu all frequency-set --governor ondemand +echo on > /sys/devices/system/cpu/smt/control +echo 1 > /proc/sys/kernel/numa_balancing +echo 0 > /sys/kernel/debug/tracing/tracing_on +echo 0 > /sys/kernel/debug/tracing/options/overwrite +echo 20000 > /sys/kernel/debug/tracing/buffer_size_kb \ No newline at end of file diff --git a/utils/setup-host.sh b/utils/setup-host.sh old mode 100644 new mode 100755 index 7b730aaff..d83516b3a --- a/utils/setup-host.sh +++ b/utils/setup-host.sh @@ -6,6 +6,12 @@ TCP_SOCKET_BUF_MB=1 ECN_ENABLED=1 HWPREF_ENABLED=1 RDMA=0 +LOCK_FREQ=1 +DISABLE_HYPER=1 +DISABLE_NUMA_BALANCE=1 +CPU_FREQ="2000MHz" +CPUPOWER_PATH="/home/lbalara/linux-6.12.9/tools/power/cpupower" #TODO: HARDCODED + help() { @@ -81,3 +87,18 @@ else modprobe msr wrmsr -a 0x1a4 1 fi + +if [ "$LOCK_FREQ" -eq 1 ]; then + log_info "Disabling turbo..." + LD_LIBRARY_PATH=$CPUPOWER_PATH $CPUPOWER_PATH/cpupower --cpu all frequency-set --freq $CPU_FREQ +fi + +if [ "$DISABLE_HYPER" -eq 1 ]; then + log_info "Disabling hyperthreading..." + echo off > /sys/devices/system/cpu/smt/control +fi + +if [ "$DISABLE_NUMA_BALANCE" -eq 1 ]; then + log_info "Disabling numa balance..." + echo 0 > /proc/sys/kernel/numa_balancing +fi \ No newline at end of file From 4caae3c79b84b7b195bd949b77785ff9d492a8b7 Mon Sep 17 00:00:00 2001 From: Leshna Balara Date: Tue, 21 Oct 2025 09:46:20 -0500 Subject: [PATCH 12/18] fix cpupower path --- utils/reset-host.sh | 2 +- utils/setup-host.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/reset-host.sh b/utils/reset-host.sh index f15e7de43..dc9b36440 100755 --- a/utils/reset-host.sh +++ b/utils/reset-host.sh @@ -1,6 +1,6 @@ SCRIPT_NAME="reset-host" -CPUPOWER_PATH="/home/lbalara/linux-6.12.9/tools/power/cpupower" #TODO: HARDCODED +CPUPOWER_PATH="/home/lbalara/viommu/linux-6.12.9/tools/power/cpupower" #TODO: HARDCODED LD_LIBRARY_PATH=$CPUPOWER_PATH $CPUPOWER_PATH/cpupower --cpu all frequency-set --governor ondemand echo on > /sys/devices/system/cpu/smt/control diff --git a/utils/setup-host.sh b/utils/setup-host.sh index d83516b3a..23f68ff68 100755 --- a/utils/setup-host.sh +++ b/utils/setup-host.sh @@ -10,7 +10,7 @@ LOCK_FREQ=1 DISABLE_HYPER=1 DISABLE_NUMA_BALANCE=1 CPU_FREQ="2000MHz" -CPUPOWER_PATH="/home/lbalara/linux-6.12.9/tools/power/cpupower" #TODO: HARDCODED +CPUPOWER_PATH="/home/lbalara/viommu/linux-6.12.9/tools/power/cpupower" #TODO: HARDCODED help() From a68dc7a0da765ccd995023dca580ebb623c3f26e Mon Sep 17 00:00:00 2001 From: leshnabalara Date: Tue, 21 Oct 2025 10:42:39 -0500 Subject: [PATCH 13/18] hacky fix for existing run --- scripts/vm-run-dctcp-tput-experiment.sh | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/scripts/vm-run-dctcp-tput-experiment.sh b/scripts/vm-run-dctcp-tput-experiment.sh index 3e069596a..a4ba211fc 100755 --- a/scripts/vm-run-dctcp-tput-experiment.sh +++ b/scripts/vm-run-dctcp-tput-experiment.sh @@ -292,7 +292,7 @@ cleanup() { $SSH_CLIENT_CMD \ 'sudo pkill -9 -f iperf; screen -wipe || true' $SSH_HOST_CMD \ - 'screen -ls | grep -E "\.host_session|\.perf_screen|\.logging_session_host" | cut -d. -f1 | xargs -r -I % screen -S % -X quit' + 'screen -ls | grep -E "\.host_session|\.perf_screen|\.perf_kvm_screen|\.perf_sched_screen|\.logging_session_host" | cut -d. -f1 | xargs -r -I % screen -S % -X quit' $SSH_HOST_CMD \ 'screen -wipe || true' @@ -336,7 +336,7 @@ for ((j = 0; j < NUM_RUNS; j += 1)); do guest_mlc_log_file="${current_guest_reports_dir}/mlc.log" perf_host_data_file_remote="${host_reports_dir_remote}/perf_host_cpu.data" perf_kvm_data_file_remote="${host_reports_dir_remote}/perf_host_kvm.data" - perf_sched_data_file_remot="${host_reports_dir_remote}/perf_host_sched.data" + perf_sched_data_file_remote="${host_reports_dir_remote}/perf_host_sched.data" iova_ftrace_host_output_file_remote="${host_reports_dir_remote}/iova_ftrace_host.txt" ebpf_host_stats="${host_reports_dir_remote}/ebpf_host_stats.csv" @@ -423,16 +423,9 @@ for ((j = 0; j < NUM_RUNS; j += 1)); do log_info "Starting HOST perf record (CPU profiling) on $HOST_IP..." host_perf_cmd="sudo '$HOST_PERF' record -F 99 -a -g --call-graph dwarf -o '$perf_host_data_file_remote' -- sleep '$PROFILING_LOGGING_DUR_S'; exec bash" $SSH_HOST_CMD "screen -dmS perf_screen sudo bash -c \"$host_perf_cmd\"" - host_perf_kvm_cmd="sudo '$HOST_PERF' kvm stat record -p \$(pidof qemu-system-x86_64 | tr ' ' ,) -o '$perf_kvm_data_file_remote'; exec bash" + host_perf_kvm_cmd="'$HOST_PERF' kvm stat record -p 7027 -o '$perf_kvm_data_file_remote'; exec bash" $SSH_HOST_CMD "screen -dmS perf_kvm_screen sudo bash -c \"$host_perf_kvm_cmd\"" - host_perf_sched_cmd="QPID=\$(pidof qemu-system-x86_64 | tr ' ' ,); \ - TIDS=\$(ps -T -p \"\$QPID\" -o tid=,comm= | awk '/CPU .*KVM/ {print \$1}' | paste -sd, -); \ - if [ -z \"\$TIDS\" ]; then \ - echo 'Error: No KVM vCPU threads found.' >&2; \ - else \ - sudo '$HOST_PERF' sched record -t \"\$TIDS\" -o '$perf_sched_data_file_remote'; \ - fi; \ - exec bash" + host_perf_sched_cmd="'$HOST_PERF' sched record -t 7058,7060,7061,7062,7063,7065,7066,7068,7069,7070,7073,7074,7075,7076,7078,7079,7080,7081,7082,7083,7084,7085,7086,7087,7088,7089,7090,7091,7092,7093,7094,7095 -o '$perf_sched_data_file_remote'; exec bash" $SSH_HOST_CMD "screen -dmS perf_sched_screen sudo bash -c \"$host_perf_sched_cmd\"" fi From 88dd8069a99e39d8273e486b38fdb20d782c6bef Mon Sep 17 00:00:00 2001 From: Leshna Balara Date: Thu, 23 Oct 2025 16:54:55 -0500 Subject: [PATCH 14/18] add host side scripts to simplify setup --- utils/perf-record-host.sh | 59 +++++++++++++++++++++++++++++++++++++++ utils/reset-host.sh | 4 ++- utils/setup-host.sh | 8 +++++- 3 files changed, 69 insertions(+), 2 deletions(-) create mode 100755 utils/perf-record-host.sh diff --git a/utils/perf-record-host.sh b/utils/perf-record-host.sh new file mode 100755 index 000000000..480948e86 --- /dev/null +++ b/utils/perf-record-host.sh @@ -0,0 +1,59 @@ +SCRIPT_NAME="perf-record-host" + +# default values +DURATION=20 +PERF=/home/lbalara/viommu/linux-6.12.9/tools/perf/perf +GUEST_SRC=/home/lbalara/viommu/modified-linux-6.12.9 #TODO +EXP_NAME=unknown + + +# string literals +VM_COPY_DIR=temp +QEMU_PROCESS_NAME=qemu-system-x86_64 +CPU_DATA=perf_host_cpu.data +KVM_DATA=perf_host_kvm.data +KVM_STAT_DATA=perf.data.guest +SCHED_DATA=perf_host_sched.data +LOGS=perf_host.log + +help() +{ + echo "Usage: $SCRIPT_NAME + [ -d | --dur (duration in second) ] + [ -e | --exp (experiment name) ] + [ -h | --help ]" + exit 2 +} + +SHORT=d:,e:,h +LONG=dur:,exp:,help +PARSED_OPTS=$(getopt -a -n $SCRIPT_NAME --options $SHORT --longoptions $LONG -- "$@") + +VALID_ARGUMENTS=$# +if [ "$VALID_ARGUMENTS" -eq 0 ]; then + help +fi +eval set -- "$PARSED_OPTS" + +while :;do + case "$1" in + -d | --dur) DURATION="$2"; shift 2 ;; + -e | --exp) EXP_NAME="$2"; shift 2 ;; + -h | --help) help ;; + --) shift; break ;; + *) echo "Unexpected option: $1"; help ;; + esac +done + +log_info() { + echo "[INFO] - $1" +} + +QPID=$(pidof $QEMU_PROCESS_NAME) + +mkdir -p reports/${EXP_NAME} + +# $PERF record -F 99 -a -g --call-graph dwarf -o reports/${EXP_NAME}/${CPU_DATA} -- sleep $DURATION & +# $PERF kvm --guest --host --guestkallsyms=${VM_COPY_DIR}/kallsyms --guestmodules=${VM_COPY_DIR}/modules --guestvmlinux=$GUEST_SRC/vmlinux record -p $QPID -F 99 -o reports/${EXP_NAME}/${KVM_DATA} -- sleep $DURATION > reports/${EXP_NAME}/${LOGS} 2>&1 & +$PERF kvm stat record -p $QPID -o reports/${EXP_NAME}/${KVM_STAT_DATA} -- sleep $DURATION > reports/${EXP_NAME}/${LOGS} 2>&1 & +$PERF sched record -p $QPID -o reports/${EXP_NAME}/${SCHED_DATA} -- sleep $DURATION > reports/${EXP_NAME}/${LOGS} 2>&1 & diff --git a/utils/reset-host.sh b/utils/reset-host.sh index dc9b36440..33236dd3c 100755 --- a/utils/reset-host.sh +++ b/utils/reset-host.sh @@ -7,4 +7,6 @@ echo on > /sys/devices/system/cpu/smt/control echo 1 > /proc/sys/kernel/numa_balancing echo 0 > /sys/kernel/debug/tracing/tracing_on echo 0 > /sys/kernel/debug/tracing/options/overwrite -echo 20000 > /sys/kernel/debug/tracing/buffer_size_kb \ No newline at end of file +echo 20000 > /sys/kernel/debug/tracing/buffer_size_kb + +rm -rf temp/ \ No newline at end of file diff --git a/utils/setup-host.sh b/utils/setup-host.sh index 23f68ff68..e818f99de 100755 --- a/utils/setup-host.sh +++ b/utils/setup-host.sh @@ -11,7 +11,9 @@ DISABLE_HYPER=1 DISABLE_NUMA_BALANCE=1 CPU_FREQ="2000MHz" CPUPOWER_PATH="/home/lbalara/viommu/linux-6.12.9/tools/power/cpupower" #TODO: HARDCODED - +VM_USER=schai +VM_ADDR=192.168.122.53 +VM_KEY=/home/lbalara/.ssh/id_rsa help() { @@ -52,6 +54,10 @@ log_info() { echo "[INFO] - $1" } +mkdir -p temp +scp -i ${VM_KEY} ${VM_USER}@${VM_ADDR}:/proc/kallsyms temp/kallsyms +scp -i ${VM_KEY} ${VM_USER}@${VM_ADDR}:/proc/modules temp/modules + if [ "$RDMA" -eq 1 ]; then log_info "Configuring MTU according to RDMA supported values..." MTU=$(($MTU + 96)) From ae1b2ff5e83087f5fd56b02d5bf3a5636e1a2815 Mon Sep 17 00:00:00 2001 From: leshnabalara Date: Thu, 23 Oct 2025 16:58:16 -0500 Subject: [PATCH 15/18] call host side script for perf measurement --- scripts/sosp24-experiments/vm_flows_exp.sh | 80 ++++++++++------------ scripts/vm-run-dctcp-tput-experiment.sh | 11 +-- 2 files changed, 37 insertions(+), 54 deletions(-) diff --git a/scripts/sosp24-experiments/vm_flows_exp.sh b/scripts/sosp24-experiments/vm_flows_exp.sh index 3605c89c5..970e43143 100755 --- a/scripts/sosp24-experiments/vm_flows_exp.sh +++ b/scripts/sosp24-experiments/vm_flows_exp.sh @@ -98,12 +98,13 @@ for socket_buf in 1; do for ring_buffer in 512; do # 5 10 20 40 for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32; do - num_cores=$i + #for i in 4; do + num_cores=$i client_cores_mask=($(echo $client_cores | tr ',' '\n' | head -n $num_cores | tr '\n' ',')) server_cores_mask=($(echo $server_cores | tr ',' '\n' | head -n $num_cores | tr '\n' ',')) format_i=$(printf "%02d\n" $i) - exp_name="static-${timestamp}-$(uname -r)-flow${format_i}-${iommu_config}-ringbuf-${ring_buffer}_sokcetbuf${socket_buf}_${num_cores}cores" + exp_name="${timestamp}-$(uname -r)-flow${format_i}-${iommu_config}-ringbuf-${ring_buffer}_sokcetbuf${socket_buf}_${num_cores}cores" echo $exp_name if [ "$DRY_RUN" -eq 1 ]; then @@ -118,8 +119,6 @@ for socket_buf in 1; do -e "$exp_name" -m 4000 -r $ring_buffer -b "100g" -d 1\ --socket-buf $socket_buf --mlc-cores 'none' --runs 3 - # > /dev/null 2>&1 - #sudo bash run-dctcp-tput-experiment.sh -E $exp_name -M 4000 --num_servers $i --num_clients $i -c "4" -m "20" --ring_buffer 256 --buf 1 --mlc_cores 'none' --bandwidth "100g" --server_intf $server_intf --client_intf $client_intf python3 report-tput-metrics.py $exp_name tput,drops,acks,iommu,cpu | sudo tee ../utils/reports/$exp_name/summary.txt echo $PWD cd ../utils/reports/$exp_name @@ -129,49 +128,42 @@ for socket_buf in 1; do cd - sudo chmod +666 -R ../utils/reports/$exp_name - # python sosp24-experiments/plot_iova_logging.py \ - # --exp_folder "../utils/reports/$exp_name" \ - # --log_file "iova.log" done done done # Temporary loop to measure impact of core randomization -for random in 1 2 3; do +# +#for random in 1 2; do # 5 10 20 40 - for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32; do - num_cores=$i - client_cores_mask=($(echo $client_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) - server_cores_mask=($(echo $server_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) - - format_i=$(printf "%02d\n" $i) - exp_name="rand-${timestamp}-$(uname -r)-flow${format_i}-${iommu_config}-ringbuf-512_sokcetbuf1_${num_cores}cores" - echo $exp_name - - if [ "$DRY_RUN" -eq 1 ]; then - continue - fi - - sudo bash vm-run-dctcp-tput-experiment.sh \ - --guest-home "$GUEST_HOME" --guest-ip "$GUEST_IP" --guest-intf "$GUEST_INTF" --guest-bus "$GUEST_NIC_BUS" -n "$i" -c $server_cores_mask \ - --client-home "$CLIENT_HOME" --client-ip "$CLIENT_IP" --client-intf "$CLIENT_INTF" -N "$i" -C $client_cores_mask \ - --host-home "$HOST_HOME" --host-ip "$HOST_IP" \ - --client-ssh-name "$CLIENT_SSH_UNAME" --client-ssh-pass "$CLIENT_SSH_PASSWORD" --client-ssh-host "$CLIENT_SSH_HOST" --client-ssh-use-pass "$CLIENT_USE_PASS_AUTH" --client-ssh-ifile "$CLIENT_SSH_IDENTITY_FILE" \ - -e "$exp_name" -m 4000 -r 512 -b "100g" -d 1\ - --socket-buf 1 --mlc-cores 'none' --runs 1 - - # > /dev/null 2>&1 - #sudo bash run-dctcp-tput-experiment.sh -E $exp_name -M 4000 --num_servers $i --num_clients $i -c "4" -m "20" --ring_buffer 256 --buf 1 --mlc_cores 'none' --bandwidth "100g" --server_intf $server_intf --client_intf $client_intf - python3 report-tput-metrics.py $exp_name tput,drops,acks,iommu,cpu | sudo tee ../utils/reports/$exp_name/summary.txt - echo $PWD - cd ../utils/reports/$exp_name - - sudo bash -c "cat /sys/kernel/debug/tracing/trace > iova.log" - cd - - sudo chmod +666 -R ../utils/reports/$exp_name - - # python sosp24-experiments/plot_iova_logging.py \ - # --exp_folder "../utils/reports/$exp_name" \ - # --log_file "iova.log" - done -done +# for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32; do +# num_cores=$i +# client_cores_mask=($(echo $client_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) +# server_cores_mask=($(echo $server_cores | tr ',' '\n' | shuf -n $num_cores | tr '\n' ',')) + +# format_i=$(printf "%02d\n" $i) +# exp_name="rand${random}-${timestamp}-$(uname -r)-flow${format_i}-${iommu_config}-ringbuf-512_sokcetbuf1_${num_cores}cores" +# echo $exp_name + +# if [ "$DRY_RUN" -eq 1 ]; then +# continue +# fi + +# sudo bash vm-run-dctcp-tput-experiment.sh \ +# --guest-home "$GUEST_HOME" --guest-ip "$GUEST_IP" --guest-intf "$GUEST_INTF" --guest-bus "$GUEST_NIC_BUS" -n "$i" -c $server_cores_mask \ +# --client-home "$CLIENT_HOME" --client-ip "$CLIENT_IP" --client-intf "$CLIENT_INTF" -N "$i" -C $client_cores_mask \ +# --host-home "$HOST_HOME" --host-ip "$HOST_IP" \ +# --client-ssh-name "$CLIENT_SSH_UNAME" --client-ssh-pass "$CLIENT_SSH_PASSWORD" --client-ssh-host "$CLIENT_SSH_HOST" --client-ssh-use-pass "$CLIENT_USE_PASS_AUTH" --client-ssh-ifile "$CLIENT_SSH_IDENTITY_FILE" \ +# -e "$exp_name" -m 4000 -r 512 -b "100g" -d 1\ +# --socket-buf 1 --mlc-cores 'none' --runs 1 + +# python3 report-tput-metrics.py $exp_name tput,drops,acks,iommu,cpu | sudo tee ../utils/reports/$exp_name/summary.txt +# echo $PWD +# cd ../utils/reports/$exp_name + +# sudo bash -c "cat /sys/kernel/debug/tracing/trace > iova.log" +# cd - +# sudo chmod +666 -R ../utils/reports/$exp_name + +# done +#done diff --git a/scripts/vm-run-dctcp-tput-experiment.sh b/scripts/vm-run-dctcp-tput-experiment.sh index a4ba211fc..dbec35cd7 100755 --- a/scripts/vm-run-dctcp-tput-experiment.sh +++ b/scripts/vm-run-dctcp-tput-experiment.sh @@ -421,12 +421,8 @@ for ((j = 0; j < NUM_RUNS; j += 1)); do log_info "Starting GUEST perf record (CPU profiling)..." sudo "$GUEST_PERF" record -F 99 -a -g --call-graph dwarf -o "$perf_guest_data_file" -- sleep "$PROFILING_LOGGING_DUR_S" & log_info "Starting HOST perf record (CPU profiling) on $HOST_IP..." - host_perf_cmd="sudo '$HOST_PERF' record -F 99 -a -g --call-graph dwarf -o '$perf_host_data_file_remote' -- sleep '$PROFILING_LOGGING_DUR_S'; exec bash" + host_perf_cmd="cd '$HOST_SETUP_DIR'; sudo bash perf-record-host.sh -d '$PROFILING_LOGGING_DUR_S' -e '${EXP_NAME}-RUN-${j}'; exec bash" $SSH_HOST_CMD "screen -dmS perf_screen sudo bash -c \"$host_perf_cmd\"" - host_perf_kvm_cmd="'$HOST_PERF' kvm stat record -p 7027 -o '$perf_kvm_data_file_remote'; exec bash" - $SSH_HOST_CMD "screen -dmS perf_kvm_screen sudo bash -c \"$host_perf_kvm_cmd\"" - host_perf_sched_cmd="'$HOST_PERF' sched record -t 7058,7060,7061,7062,7063,7065,7066,7068,7069,7070,7073,7074,7075,7076,7078,7079,7080,7081,7082,7083,7084,7085,7086,7087,7088,7089,7090,7091,7092,7093,7094,7095 -o '$perf_sched_data_file_remote'; exec bash" - $SSH_HOST_CMD "screen -dmS perf_sched_screen sudo bash -c \"$host_perf_sched_cmd\"" fi log_info "Starting CLIENT-side logging on $CLIENT_SSH_HOST..." @@ -478,11 +474,6 @@ for ((j = 0; j < NUM_RUNS; j += 1)); do host_loader_basename=$(basename "$EBPF_HOST_LOADER") $SSH_HOST_CMD "sudo pkill -SIGINT -f '$host_loader_basename'" fi - if [ "$PERF_TRACING_ENABLED" -eq 1 ]; then - $SSH_HOST_CMD "screen -X -S perf_kvm_screen quit" - $SSH_HOST_CMD "screen -X -S perf_sched_screen quit" - fi - # --- Transfer Report Files from Remote Machines --- log_info "Transferring report files from CLIENT and HOST..." From 5f2f6babbe27e35bcaa84eb5b82ce13a52ade89a Mon Sep 17 00:00:00 2001 From: Leshna Balara Date: Fri, 24 Oct 2025 00:29:20 -0500 Subject: [PATCH 16/18] add hooks for tracing writel --- tracing/guest_loader.c | 4 ++++ tracing/guest_tracer.bpf.c | 24 ++++++++++++++++++++++++ tracing/tracing_utils.h | 2 ++ 3 files changed, 30 insertions(+) diff --git a/tracing/guest_loader.c b/tracing/guest_loader.c index a5ae95e93..fc27a341f 100644 --- a/tracing/guest_loader.c +++ b/tracing/guest_loader.c @@ -150,6 +150,10 @@ probe_def_t probes_to_attach[] = { {"kretprobe_page_pool_put_unrefed_netmem", "page_pool_put_unrefed_netmem", PROBE_TYPE_KRETPROBE, PAGE_POOL_PUT_NETMEM,NULL}, {"kprobe_page_pool_put_unrefed_page", "page_pool_put_unrefed_page", PROBE_TYPE_KPROBE, PAGE_POOL_PUT_PAGE,NULL}, {"kretprobe_page_pool_put_unrefed_page", "page_pool_put_unrefed_page", PROBE_TYPE_KRETPROBE, PAGE_POOL_PUT_PAGE,NULL}, + {"kprobe_writel_wrapper", "writel_wrapper", PROBE_TYPE_KPROBE, WRITEL_WRAPPER,NULL}, + {"kretprobe_writel_wrapper", "writel_wrapper", PROBE_TYPE_KRETPROBE, WRITEL_WRAPPER,NULL}, + {"kprobe_after_writel_while_wrapper", "after_writel_while_wrapper", PROBE_TYPE_KPROBE, AFTER_WRITEL_WHILE_WRAPPER,NULL}, + {"kretprobe_after_writel_while_wrapper", "after_writel_while_wrapper", PROBE_TYPE_KRETPROBE, AFTER_WRITEL_WHILE_WRAPPER,NULL}, // --- Additions for count functions --- {"kprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook", "mlx5_core:count_mlx5e_alloc_rx_mpwqe_perpage_hook", PROBE_TYPE_KPROBE, COUNT_MLX5E_RX_MPWQE_PER_PAGE,"mlx5_core"}, {"kretprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook", "mlx5_core:count_mlx5e_alloc_rx_mpwqe_perpage_hook", PROBE_TYPE_KRETPROBE, COUNT_MLX5E_RX_MPWQE_PER_PAGE,"mlx5_core"}, diff --git a/tracing/guest_tracer.bpf.c b/tracing/guest_tracer.bpf.c index 6919d99ad..243516f97 100644 --- a/tracing/guest_tracer.bpf.c +++ b/tracing/guest_tracer.bpf.c @@ -450,6 +450,30 @@ int BPF_KRETPROBE(kretprobe_trace_iommu_flush_write_buffer_lock_wrapper, void *r return _bpf_utils_trace_func_exit(ctx, GUEST, false); } +SEC("kprobe/writel_wrapper") +int BPF_KPROBE(kprobe_writel_wrapper, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/writel_wrapper") +int BPF_KRETPROBE(kretprobe_writel_wrapper, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + +SEC("kprobe/after_writel_while_wrapper") +int BPF_KPROBE(kprobe_after_writel_while_wrapper, void *ret) +{ + return _bpf_utils_trace_func_entry(ctx); +} + +SEC("kretprobe/after_writel_while_wrapper") +int BPF_KRETPROBE(kretprobe_after_writel_while_wrapper, void *ret) +{ + return _bpf_utils_trace_func_exit(ctx, GUEST, false); +} + SEC("kprobe/count_mlx5e_alloc_rx_mpwqe_perpage_hook") int BPF_KPROBE(kprobe_count_mlx5e_alloc_rx_mpwqe_perpage_hook, void *ret) { diff --git a/tracing/tracing_utils.h b/tracing/tracing_utils.h index 40392b4a2..330bd39d4 100644 --- a/tracing/tracing_utils.h +++ b/tracing/tracing_utils.h @@ -53,6 +53,8 @@ enum FunctionName PAGE_POOL_PUT_PAGE, QEMU_VTD_FETCH_INV_DESC, IOMMUFD_FOPS_IOCTL, + WRITEL_WRAPPER, + AFTER_WRITEL_WHILE_WRAPPER, TRACE_FUNCS_END, // Marks the end of trace functions // --- Section for Simple Frequency Counting --- From 275bee414cb43dff6dbc88ddc151ecc7d3d87626 Mon Sep 17 00:00:00 2001 From: leshnabalara Date: Sun, 2 Nov 2025 19:12:44 -0600 Subject: [PATCH 17/18] Start ftace logging at steady state --- scripts/vm-run-dctcp-tput-experiment.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scripts/vm-run-dctcp-tput-experiment.sh b/scripts/vm-run-dctcp-tput-experiment.sh index dbec35cd7..14a868ae0 100755 --- a/scripts/vm-run-dctcp-tput-experiment.sh +++ b/scripts/vm-run-dctcp-tput-experiment.sh @@ -416,10 +416,29 @@ for ((j = 0; j < NUM_RUNS; j += 1)); do sleep 2 # Allow eBPF loaders to initialize fi + # --- Ftrace Setup (Guest & Host) --- + log_info "Configuring GUEST ftrace for IOVA logging (Buffer: ${FTRACE_BUFFER_SIZE_KB}KB, Overwrite: ${FTRACE_OVERWRITE_ON_FULL})..." + sudo echo "$FTRACE_BUFFER_SIZE_KB" > /sys/kernel/debug/tracing/buffer_size_kb + sudo echo "$FTRACE_OVERWRITE_ON_FULL" > /sys/kernel/debug/tracing/options/overwrite + sudo echo > /sys/kernel/debug/tracing/trace # Clear buffer + sudo echo 1 > /sys/kernel/debug/tracing/tracing_on + log_info "GUEST IOVA ftrace is ON." + + log_info "Configuring HOST ftrace for IOVA logging on $HOST_IP..." + $SSH_HOST_CMD \ + "sudo bash -c 'sudo echo '$FTRACE_BUFFER_SIZE_KB' > /sys/kernel/debug/tracing/buffer_size_kb; \ + sudo echo '$FTRACE_OVERWRITE_ON_FULL' > /sys/kernel/debug/tracing/options/overwrite; \ + sudo echo > /sys/kernel/debug/tracing/trace; \ + sudo echo 1 > /sys/kernel/debug/tracing/tracing_on'" + log_info "HOST IOVA ftrace is ON." + # --- Start Main Profiling & Logging Phase --- if [ "$PERF_TRACING_ENABLED" -eq 1 ]; then log_info "Starting GUEST perf record (CPU profiling)..." sudo "$GUEST_PERF" record -F 99 -a -g --call-graph dwarf -o "$perf_guest_data_file" -- sleep "$PROFILING_LOGGING_DUR_S" & + fi + + if [ "$PERF_TRACING_HOST_ENABLED" -eq 1 ]; then log_info "Starting HOST perf record (CPU profiling) on $HOST_IP..." host_perf_cmd="cd '$HOST_SETUP_DIR'; sudo bash perf-record-host.sh -d '$PROFILING_LOGGING_DUR_S' -e '${EXP_NAME}-RUN-${j}'; exec bash" $SSH_HOST_CMD "screen -dmS perf_screen sudo bash -c \"$host_perf_cmd\"" From feeb12426753305383f7de65c22ebf2369830802 Mon Sep 17 00:00:00 2001 From: Siyuan Chai Date: Sun, 2 Nov 2025 19:16:56 -0600 Subject: [PATCH 18/18] Add reset guest kernel utils --- utils/reset-guest-kernel-modules.sh | 46 +++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100755 utils/reset-guest-kernel-modules.sh diff --git a/utils/reset-guest-kernel-modules.sh b/utils/reset-guest-kernel-modules.sh new file mode 100755 index 000000000..cb5e014d8 --- /dev/null +++ b/utils/reset-guest-kernel-modules.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ========================================================= +# Sync kernel modules from host into guest +# Siyuan's auto-reset script (runs on boot with systemd) +# ========================================================= + +KERNEL_NAME=$(uname -r) +HOST_USER="siyuanc3" +HOST_IP="192.168.122.1" +SSH_PORT=22 +IDENTITY_FILE="/home/schai/.ssh/id_rsa" + +SSH_OPTS="-i ${IDENTITY_FILE} -o StrictHostKeyChecking=accept-new" + +TMP_TARBALL="/tmp/modules-${KERNEL_NAME}.tar.gz" +REMOTE_TARBALL="/tmp/modules-${KERNEL_NAME}.tar.gz" +DST_DIR="/lib/modules/${KERNEL_NAME}" + +echo "[1/5] Requesting host to package modules..." +ssh -p "$SSH_PORT" $SSH_OPTS "${HOST_USER}@${HOST_IP}" \ + "sudo rm -f ${REMOTE_TARBALL} && sudo tar czf ${REMOTE_TARBALL} -C /lib/modules/${KERNEL_NAME} ." + +echo "[2/5] Copying archive from host:${HOST_IP} ..." +scp -P "$SSH_PORT" $SSH_OPTS "${HOST_USER}@${HOST_IP}:${REMOTE_TARBALL}" "$TMP_TARBALL" + +echo "[3/5] Installing into ${DST_DIR} ..." +sudo mkdir -p "$DST_DIR" +sudo tar xzf "$TMP_TARBALL" -C "$DST_DIR" --numeric-owner + +echo "[4/5] Backing up DKMS updates (if any) ..." +if [[ -d "${DST_DIR}/updates/dkms" ]]; then + sudo mkdir -p "/tmp/dkms_backup/${KERNEL_NAME}" + sudo sh -c "mv ${DST_DIR}/updates/dkms/* /tmp/dkms_backup/${KERNEL_NAME}/ || true" +fi + +echo "[5/5] Running depmod ..." +sudo depmod -a "$KERNEL_NAME" + +# Optional cleanup on host and guest +echo "[cleanup] Removing temporary archives..." +ssh -p "$SSH_PORT" $SSH_OPTS "${HOST_USER}@${HOST_IP}" "sudo rm -f ${REMOTE_TARBALL}" || true +sudo rm -f "$TMP_TARBALL" || true + +echo "✅ Done. Try: sudo modprobe msr"