diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index c8faecfcb..63809fe04 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -1,6 +1,6 @@
 name: CI
 on:
-  push: { branches: [master] }
+  push: { branches: [main] }
   pull_request:
 
 concurrency:
@@ -247,13 +247,13 @@ jobs:
     - name: Set up dependencies
       run: |
         sudo apt-get update
-        sudo apt-get install -y valgrind gdb libc6-dbg # Needed for iai-callgrind
+        sudo apt-get install -y valgrind gdb libc6-dbg # Needed for gungraun
         rustup update "$BENCHMARK_RUSTC" --no-self-update
         rustup default "$BENCHMARK_RUSTC"
-        # Install the version of iai-callgrind-runner that is specified in Cargo.toml
-        iai_version="$(cargo metadata --format-version=1 --features icount |
-           jq -r '.packages[] | select(.name == "iai-callgrind").version')"
-        cargo binstall -y iai-callgrind-runner --version "$iai_version"
+        # Install the version of gungraun-runner that is specified in Cargo.toml
+        gungraun_version="$(cargo metadata --format-version=1 --features icount |
+           jq -r '.packages[] | select(.name == "gungraun").version')"
+        cargo binstall -y gungraun-runner --version "$gungraun_version"
         sudo apt-get install valgrind
     - uses: Swatinem/rust-cache@v2
       with:
diff --git a/.gitignore b/.gitignore
index f12b871c2..abe346659 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@ compiler-rt
 # Benchmark cache
 baseline-*
 iai-home
+gungraun-home
 
 # Temporary files
 *.bk
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 9ae4f893c..f74d3f8ba 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -150,8 +150,8 @@ cargo bench --no-default-features \
 ```
 
 There are also benchmarks that check instruction count behind the `icount`
-feature. These require [`iai-callgrind-runner`] (via Cargo) and [Valgrind]
-to be installed, which means these only run on limited platforms.
+feature. These require [`gungraun-runner`] (via Cargo) and [Valgrind] to be
+installed, which means these only run on limited platforms.
 
 Instruction count benchmarks are run as part of CI to flag performance
 regresions.
@@ -163,7 +163,7 @@ cargo bench --no-default-features \
     --bench icount --bench mem_icount
 ```
 
-[`iai-callgrind-runner`]: https://crates.io/crates/iai-callgrind-runner
+[`gungraun-runner`]: https://crates.io/crates/gungraun-runner
 [Valgrind]: https://valgrind.org/
 
 ## Subtree synchronization
diff --git a/Cargo.toml b/Cargo.toml
index 956d738f3..6b4e691a1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -51,5 +51,5 @@ codegen-units = 1
 lto = "fat"
 
 [profile.bench]
-# Required for iai-callgrind
+# Required for gungraun
 debug = true
diff --git a/PUBLISHING.md b/PUBLISHING.md
index 3df682ab0..c52191064 100644
--- a/PUBLISHING.md
+++ b/PUBLISHING.md
@@ -5,7 +5,7 @@ It's not great, but it works for now. PRs to improve this process would be
 greatly appreciated!
 
 1. Make sure you've got a clean working tree and it's updated with the latest
-   changes on `master`
+   changes on `main`
 2. Edit `Cargo.toml` to bump the version number
 3. Commit this change
 4. Run `git tag` to create a tag for this version
diff --git a/builtins-test/Cargo.toml b/builtins-test/Cargo.toml
index 00a9d8579..2bcffe349 100644
--- a/builtins-test/Cargo.toml
+++ b/builtins-test/Cargo.toml
@@ -14,7 +14,7 @@ rand_xoshiro = "0.7"
 # To compare float builtins against
 rustc_apfloat = "0.2.3"
 # Really a dev dependency, but dev dependencies can't be optional
-iai-callgrind = { version = "0.15.2", optional = true }
+gungraun = { version = "0.17.0", optional = true }
 
 [dependencies.compiler_builtins]
 path = "../builtins-shim"
@@ -46,8 +46,8 @@ no-sys-f16-f64-convert = []
 # Skip tests that rely on f16 symbols being available on the system
 no-sys-f16 = ["no-sys-f16-f64-convert"]
 
-# Enable icount benchmarks (requires iai-callgrind and valgrind)
-icount = ["dep:iai-callgrind"]
+# Enable icount benchmarks (requires gungraun-runner and valgrind locally)
+icount = ["dep:gungraun"]
 
 # Enable report generation without bringing in more dependencies by default
 benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
diff --git a/builtins-test/benches/mem_icount.rs b/builtins-test/benches/mem_icount.rs
index bd88cf80c..966ceea86 100644
--- a/builtins-test/benches/mem_icount.rs
+++ b/builtins-test/benches/mem_icount.rs
@@ -1,11 +1,11 @@
-//! Benchmarks that use Callgrind (via `iai_callgrind`) to report instruction count metrics. This
+//! Benchmarks that use Callgrind (via `gungraun`) to report instruction count metrics. This
 //! is stable enough to be tested in CI.
 
 use std::hint::black_box;
 use std::{ops, slice};
 
 use compiler_builtins::mem::{memcmp, memcpy, memmove, memset};
-use iai_callgrind::{library_benchmark, library_benchmark_group, main};
+use gungraun::{library_benchmark, library_benchmark_group, main};
 
 const PAGE_SIZE: usize = 0x1000; // 4 kiB
 const MAX_ALIGN: usize = 512; // assume we may use avx512 operations one day
diff --git a/ci/bench-icount.sh b/ci/bench-icount.sh
index 12228b9da..56aa1df07 100755
--- a/ci/bench-icount.sh
+++ b/ci/bench-icount.sh
@@ -10,14 +10,18 @@ if [ -z "$target" ]; then
     target="$host_target"
 fi
 
-iai_home="iai-home"
+gungraun_home="gungraun-home"
 
 # Use the arch as a tag to disambiguate artifacts
 tag="$(echo "$target" | cut -d'-' -f1)"
 
-# Download the baseline from master
+# Download the baseline from main
 ./ci/ci-util.py locate-baseline --download --extract --tag "$tag"
 
+# FIXME: migration from iai-named baselines to gungraun, can be dropped
+# after the first run with gungraun.
+[ -d "iai-home" ] && mv "iai-home" "$gungraun_home"
+
 # Run benchmarks once
 function run_icount_benchmarks() {
     cargo_args=(
@@ -26,19 +30,19 @@ function run_icount_benchmarks() {
         "--features" "unstable,unstable-float,icount"
     )
 
-    iai_args=(
-        "--home" "$(pwd)/$iai_home"
-        "--callgrind-limits=ir=5.0"
+    gungraun_args=(
+        "--home" "$(pwd)/$gungraun_home"
+        "--callgrind-limits=ir=5.0%"
         "--save-summary"
     )
 
-    # Parse `cargo_arg0 cargo_arg1 -- iai_arg0 iai_arg1` syntax
-    parsing_iai_args=0
+    # Parse `cargo_arg0 cargo_arg1 -- gungraun_arg0 gungraun_arg1` syntax
+    parsing_gungraun_args=0
     while [ "$#" -gt 0 ]; do
-        if [ "$parsing_iai_args" == "1" ]; then
-            iai_args+=("$1")
+        if [ "$parsing_gungraun_args" == "1" ]; then
+            gungraun_args+=("$1")
         elif [ "$1" == "--" ]; then
-            parsing_iai_args=1
+            parsing_gungraun_args=1
         else
             cargo_args+=("$1")
         fi
@@ -46,9 +50,9 @@ function run_icount_benchmarks() {
         shift
     done
 
-    # Run iai-callgrind benchmarks. Do this in a subshell with `&& true` to
-    # capture rather than exit on error.
-    (cargo bench "${cargo_args[@]}" -- "${iai_args[@]}") && true
+    # Run gungraun benchmarks. Do this in a subshell with `&& true` to capture
+    # rather than exit on error.
+    (cargo bench "${cargo_args[@]}" -- "${gungraun_args[@]}") && true
     exit_code="$?"
 
     if [ "$exit_code" -eq 0 ]; then
@@ -68,4 +72,4 @@ run_icount_benchmarks -- --save-baseline=hardfloat
 # Name and tar the new baseline
 name="baseline-icount-$tag-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
 echo "BASELINE_NAME=$name" >>"$GITHUB_ENV"
-tar cJf "$name.tar.xz" "$iai_home"
+tar cJf "$name.tar.xz" "$gungraun_home"
diff --git a/ci/ci-util.py b/ci/ci-util.py
index c1db17c6c..113820b70 100755
--- a/ci/ci-util.py
+++ b/ci/ci-util.py
@@ -38,7 +38,7 @@
             `--tag` can be specified to look for artifacts with a specific tag, such as
             for a specific architecture.
 
-            Note that `--extract` will overwrite files in `iai-home`.
+            Note that `--extract` will overwrite files in `gungraun-home`.
 
         handle-bench-regressions PR_NUMBER
             Exit with success if the pull request contains a line starting with
@@ -49,7 +49,7 @@
 
 REPO_ROOT = Path(__file__).parent.parent
 GIT = ["git", "-C", REPO_ROOT]
-DEFAULT_BRANCH = "master"
+DEFAULT_BRANCH = "main"
 WORKFLOW_NAME = "CI"  # Workflow that generates the benchmark artifacts
 ARTIFACT_PREFIX = "baseline-icount*"
 
@@ -186,7 +186,7 @@ def __init__(self) -> None:
 
     def _init_change_list(self):
         """Create a list of files that have been changed. This uses GITHUB_REF if
-        available, otherwise a diff between `HEAD` and `master`.
+        available, otherwise a diff between `HEAD` and `main`.
         """
 
         # For pull requests, GitHub creates a ref `refs/pull/1234/merge` (1234 being
diff --git a/libm-test/Cargo.toml b/libm-test/Cargo.toml
index 0af6b0c1d..adecfc1af 100644
--- a/libm-test/Cargo.toml
+++ b/libm-test/Cargo.toml
@@ -21,8 +21,8 @@ build-musl = ["dep:musl-math-sys"]
 # Enable report generation without bringing in more dependencies by default
 benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
 
-# Enable icount benchmarks (requires iai-callgrind and valgrind)
-icount = ["dep:iai-callgrind"]
+# Enable icount benchmarks (requires gungraun-runner and valgrind locally)
+icount = ["dep:gungraun"]
 
 # Run with a reduced set of benchmarks, such as for CI
 short-benchmarks = []
@@ -31,7 +31,7 @@ short-benchmarks = []
 anyhow = "1.0.98"
 # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`.
 gmp-mpfr-sys = { version = "1.6.5", optional = true, default-features = false }
-iai-callgrind = { version = "0.15.2", optional = true }
+gungraun = { version = "0.17.0", optional = true }
 indicatif = { version = "0.18.0", default-features = false }
 libm = { path = "../libm", features = ["unstable-public-internals"] }
 libm-macros = { path = "../crates/libm-macros" }
diff --git a/libm-test/benches/icount.rs b/libm-test/benches/icount.rs
index 0b8577122..fb856d9be 100644
--- a/libm-test/benches/icount.rs
+++ b/libm-test/benches/icount.rs
@@ -1,10 +1,10 @@
-//! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable.
+//! Benchmarks that use `gungraun` to be reasonably CI-stable.
 #![feature(f16)]
 #![feature(f128)]
 
 use std::hint::black_box;
 
-use iai_callgrind::{library_benchmark, library_benchmark_group, main};
+use gungraun::{library_benchmark, library_benchmark_group, main};
 use libm::support::{HInt, Hexf, hf16, hf32, hf64, hf128, u256};
 use libm_test::generate::spaced;
 use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op};
@@ -156,7 +156,13 @@ fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {
 
 library_benchmark_group!(
     name = icount_bench_u128_group;
-    benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_narrowing_div, icount_bench_u256_add, icount_bench_u256_sub, icount_bench_u256_shl, icount_bench_u256_shr
+    benchmarks =
+    icount_bench_u128_widen_mul,
+    icount_bench_u256_narrowing_div,
+    icount_bench_u256_add,
+    icount_bench_u256_sub,
+    icount_bench_u256_shl,
+    icount_bench_u256_shr
 );
 
 #[library_benchmark]