From 2b64d7536e830506de43a2d376812dd177076af9 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Fri, 18 Oct 2024 17:40:37 +0300 Subject: [PATCH] ci: Run benchmarks with Ethernet and max MTUs (#2183) * ci: Run benchmarks with Ethernet and max MTUs * Fixes * Export * Try with len that is multiple of 8 * 2000 * 1600 * Try more values * Again * 1488 * 1504 it is for now --- .github/workflows/bench.yml | 79 +++++++++++++++++------------- neqo-bin/benches/main.rs | 10 ++-- neqo-transport/benches/transfer.rs | 10 ++-- 3 files changed, 56 insertions(+), 43 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index fcece8bd42..b2e7740cd0 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -87,8 +87,10 @@ jobs: # # Run all benchmarks at elevated priority. taskset -c 0 nice -n -20 cargo "+$TOOLCHAIN" bench --workspace --exclude neqo-bin --features bench -- --noplot | tee results.txt - nice -n -20 cargo "+$TOOLCHAIN" bench --package neqo-bin --features bench -- --noplot | tee -a results.txt - + for MTU in 1500 65536; do + sudo ip link set dev lo mtu "$MTU" + MTU=$MTU nice -n -20 cargo "+$TOOLCHAIN" bench --package neqo-bin --features bench -- --noplot | tee -a results.txt + done # Compare various configurations of neqo against msquic, and gather perf data # during the hyperfine runs. @@ -132,37 +134,41 @@ jobs: fi } - for server in msquic neqo; do - for client in msquic neqo; do - # msquic doesn't let us configure the congestion control or pacing. - if [ "$client" == "msquic" ] && [ "$server" == "msquic" ]; then - cc_opt=("") - pacing_opt=("") - else - cc_opt=("reno" "cubic") - pacing_opt=("on" "") - fi - for cc in "${cc_opt[@]}"; do - for pacing in "${pacing_opt[@]}"; do - # Make a tag string for this test, for the results. - TAG="$client,$server,$cc,$pacing" - echo "Running benchmarks for $TAG" | tee -a comparison.txt - transmogrify "${server_cmd[$server]}" "$cc" "$pacing" - # shellcheck disable=SC2086 - taskset -c 0 nice -n -20 \ - perf $PERF_OPT -o "$client-$server$EXT.server.perf" $CMD & - PID=$! - transmogrify "${client_cmd[$client]}" "$cc" "$pacing" - # shellcheck disable=SC2086 - taskset -c 1 nice -n -20 \ - perf $PERF_OPT -o "$client-$server$EXT.client.perf" \ - hyperfine -N --output null -w 1 -s "sleep 1" -n "$TAG" -u millisecond --export-markdown step.md "$CMD" | - tee -a comparison.txt - echo >> comparison.txt - kill $PID - cat step.md >> steps.md - # Sanity check the size of the last retrieved file. - [ "$(wc -c <"$SIZE")" -eq "$SIZE" ] || exit 1 + # See https://github.com/microsoft/msquic/issues/4618#issuecomment-2422611592 + for mtu in 1504 65536; do + sudo ip link set dev lo mtu "$mtu" + for server in msquic neqo; do + for client in msquic neqo; do + # msquic doesn't let us configure the congestion control or pacing. + if [ "$client" == "msquic" ] && [ "$server" == "msquic" ]; then + cc_opt=("") + pacing_opt=("") + else + cc_opt=("reno" "cubic") + pacing_opt=("on" "") + fi + for cc in "${cc_opt[@]}"; do + for pacing in "${pacing_opt[@]}"; do + # Make a tag string for this test, for the results. + TAG="$client,$server,$cc,$pacing,$mtu" + echo "Running benchmarks for $TAG" | tee -a comparison.txt + transmogrify "${server_cmd[$server]}" "$cc" "$pacing" + # shellcheck disable=SC2086 + taskset -c 0 nice -n -20 \ + perf $PERF_OPT -o "$client-$server$EXT.server.perf" $CMD & + PID=$! + transmogrify "${client_cmd[$client]}" "$cc" "$pacing" + # shellcheck disable=SC2086 + taskset -c 1 nice -n -20 \ + perf $PERF_OPT -o "$client-$server$EXT.client.perf" \ + hyperfine -N --output null -w 1 -s "sleep 1" -n "$TAG" -u millisecond --export-markdown step.md "$CMD" | + tee -a comparison.txt + echo >> comparison.txt + kill $PID + cat step.md >> steps.md + # Sanity check the size of the last retrieved file. + [ "$(wc -c <"$SIZE")" -eq "$SIZE" ] || exit 1 + done done done done @@ -170,12 +176,15 @@ jobs: # Merge the results tables generated by hyperfine into a single table. echo "Transfer of $SIZE bytes over loopback." > comparison.md awk '(!/^\| Command/ || !c++) && (!/^\|:/ || !d++)' < steps.md |\ - sed -E 's/`//g; s/^\|:/\|:---\|:---\|:---\|:/g; s/,/ \| /g; s/^\| Command/\| Client \| Server \| CC \| Pacing/g' >> comparison.md + sed -E 's/`//g; s/^\|:/\|:---\|:---\|:---\|:---\|:/g; s/,/ \| /g; s/^\| Command/\| Client \| Server \| CC \| Pacing \| MTU/g' >> comparison.md rm -r "$TMP" # Re-enable turboboost, hyperthreading and use powersave governor. - name: Restore machine - run: sudo /root/bin/unprep.sh + run: | + sudo /root/bin/unprep.sh + # In case the previous test failed: + sudo ip link set dev lo mtu 65536 if: success() || failure() || cancelled() - name: Post-process perf data diff --git a/neqo-bin/benches/main.rs b/neqo-bin/benches/main.rs index 4237c13408..8793bf0928 100644 --- a/neqo-bin/benches/main.rs +++ b/neqo-bin/benches/main.rs @@ -4,7 +4,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::{path::PathBuf, str::FromStr}; +use std::{env, path::PathBuf, str::FromStr}; use criterion::{criterion_group, criterion_main, BatchSize, Criterion, Throughput}; use neqo_bin::{client, server}; @@ -20,18 +20,18 @@ fn transfer(c: &mut Criterion) { neqo_crypto::init_db(PathBuf::from_str("../test-fixture/db").unwrap()).unwrap(); let done_sender = spawn_server(); - + let mtu = env::var("MTU").map_or_else(|_| String::new(), |mtu| format!("/mtu-{mtu}")); for Benchmark { name, requests } in [ Benchmark { - name: "1-conn/1-100mb-resp (aka. Download)".to_string(), + name: format!("1-conn/1-100mb-resp{mtu} (aka. Download)"), requests: vec![100 * 1024 * 1024], }, Benchmark { - name: "1-conn/10_000-parallel-1b-resp (aka. RPS)".to_string(), + name: format!("1-conn/10_000-parallel-1b-resp{mtu} (aka. RPS)"), requests: vec![1; 10_000], }, Benchmark { - name: "1-conn/1-1b-resp (aka. HPS)".to_string(), + name: format!("1-conn/1-1b-resp{mtu} (aka. HPS)"), requests: vec![1; 1], }, ] { diff --git a/neqo-transport/benches/transfer.rs b/neqo-transport/benches/transfer.rs index be4876cc9e..f95d26e371 100644 --- a/neqo-transport/benches/transfer.rs +++ b/neqo-transport/benches/transfer.rs @@ -21,7 +21,7 @@ const ZERO: Duration = Duration::from_millis(0); const JITTER: Duration = Duration::from_millis(10); const TRANSFER_AMOUNT: usize = 1 << 22; // 4Mbyte -fn benchmark_transfer(c: &mut Criterion, label: &str, seed: &Option>) { +fn benchmark_transfer(c: &mut Criterion, label: &str, seed: Option<&impl AsRef>) { for pacing in [false, true] { let mut group = c.benchmark_group(format!("transfer/pacing-{pacing}")); // Don't let criterion calculate throughput, as that's based on wall-clock time, not @@ -63,14 +63,18 @@ fn benchmark_transfer(c: &mut Criterion, label: &str, seed: &Option