Introduce hooks to allow more customisable benchmarking

JuliaCI · Jun 20, 2024 · fd2f88a · fd2f88a
1 parent 87081e6
commit fd2f88a
Show file tree

Hide file tree

Showing 12 changed files with 368 additions and 288 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -51,8 +51,6 @@ jobs:
             ${{ runner.os }}-
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
-        env:
-          TEST_PERF_INTEGRATION: false
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v3
         with:

diff --git a/Project.toml b/Project.toml
@@ -5,7 +5,6 @@ version = "1.6.0"
 [deps]
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-LinuxPerf = "b4c46c6c-4fb0-484d-a11a-41bc3392d094"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
@@ -17,7 +16,6 @@ Aqua = "0.8"
 Compat = ">= 4.11.0"
 JSON = "0.18, 0.19, 0.20, 0.21"
 JuliaFormatter = "1"
-LinuxPerf = "0.3.8"
 Logging = "<0.0.1, 1"
 Printf = "<0.0.1, 1"
 Profile = "<0.0.1, 1"

diff --git a/docs/src/manual.md b/docs/src/manual.md
@@ -85,11 +85,6 @@ You can pass the following keyword arguments to `@benchmark`, `@benchmarkable`,
 - `gcsample`: If `true`, run `gc()` before each sample. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.gcsample = false`.
 - `time_tolerance`: The noise tolerance for the benchmark's time estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = 0.05`.
 - `memory_tolerance`: The noise tolerance for the benchmark's memory estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = 0.01`.
-- `enable_linux_perf`: If `true`, profile using perf `evals` times. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.enable_linux_perf = false`.
-- `linux_perf_groups`: The event groups you want to profile, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups = "(instructions,branch-instructions)"`.
-- `linux_perf_spaces`: Which of user, kernel and hypervisor space you want to profile, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces = (true, false, false)`.
-- `linux_perf_threads`: If `true`, all threads are profiled else only the the thread which starts the benchmark is profiled, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads = true`.
-- `linux_perf_gcscrub`: If `true`, run `gc()` before executing the profiling run. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub = true`.
 
 To change the default values of the above fields, one can mutate the fields of `BenchmarkTools.DEFAULT_PARAMETERS`, for example:
 

diff --git a/src/BenchmarkTools.jl b/src/BenchmarkTools.jl
@@ -10,8 +10,6 @@ using Printf
 using Profile
 using Compat: pkgversion, @noinline
 
-using LinuxPerf: LinuxPerf
-
 ##############
 # Parameters #
 ##############

diff --git a/src/execution.jl b/src/execution.jl
@@ -16,7 +16,7 @@ end
 
 mutable struct Benchmark
     samplefunc
-    linux_perf_func
+    customisable_func
     quote_vals
     params::Parameters
 end
@@ -110,25 +110,56 @@ end
 function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, kwargs...)
     params = Parameters(p; kwargs...)
     @assert params.seconds > 0.0 "time limit must be greater than 0.0"
-    if warmup
-        b.samplefunc(b.quote_vals, Parameters(params; evals=1)) #warmup sample
+    @assert params.enable_customisable_func in (:FALSE, :ALL, :LAST) "invalid value $(params.enable_customisable_func) for enable_customisable_func which must be :FALSE, :ALL or :LAST"
+    @assert !(
+        params.run_customisable_func_only && params.enable_customisable_func == :FALSE
+    ) "run_customisable_func_only is set to true, but enable_customisable_func is set to :FALSE"
+    if warmup #warmup sample
+        params.run_customisable_func_only &&
+            b.samplefunc(b.quote_vals, Parameters(params; evals=1))
+        !params.run_customisable_func_only &&
+            b.customisable_func(b.quote_vals, Parameters(params; evals=1))
     end
     trial = Trial(params)
+    if params.enable_customisable_func == :ALL
+        trial.customisable_result = []
+        trial.customisable_result_for_every_sample = true
+    end
     params.gctrial && gcscrub()
     start_time = Base.time()
-    s = b.samplefunc(b.quote_vals, params)
-    push!(trial, s[1:(end - 1)]...)
-    return_val = s[end]
+
+    return_val = nothing
+    if !params.run_customisable_func_only
+        s = b.samplefunc(b.quote_vals, params)
+        push!(trial, s[1:(end - 1)]...)
+        return_val = s[end]
+    end
+    if params.enable_customisable_func == :ALL
+        params.customisable_gcsample && gcscrub()
+        push!(trial.customisable_result, b.customisable_func(b.quote_vals, params)[1])
+    end
+
     iters = 2
     while (Base.time() - start_time) < params.seconds && iters ≤ params.samples
         params.gcsample && gcscrub()
         push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
+
+        if params.enable_customisable_func == :ALL
+            params.customisable_gcsample && gcscrub()
+            push!(trial.customisable_result, b.customisable_func(b.quote_vals, params)[1])
+        end
+
         iters += 1
     end
 
-    if params.enable_linux_perf
-        params.linux_perf_gcscrub && gcscrub()
-        trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params)
+    if params.enable_customisable_func !== :FALSE
+        params.customisable_gcsample && gcscrub()
+        s = b.customisable_func(b.quote_vals, params)
+        trial.customisable_result = s[1]
+
+        if params.run_customisable_func_only
+            return_val = s[end]
+        end
     end
 
     return trial, return_val
@@ -513,6 +544,24 @@ macro benchmarkable(args...)
     end
 end
 
+samplefunc_prehook() = (Base.gc_num(), time_ns())
+samplefunc_posthook = samplefunc_prehook
+function samplefunc_sample_result(__params, _, prehook_result, posthook_result)
+    __evals = __params.evals
+    __sample_time = posthook_result[2] - prehook_result[2]
+    __gcdiff = Base.GC_Diff(posthook_result[1], prehook_result[1])
+
+    __time = max((__sample_time / __evals) - __params.overhead, 0.001)
+    __gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
+    __memory = Int(Base.fld(__gcdiff.allocd, __evals))
+    __allocs = Int(
+        Base.fld(
+            __gcdiff.malloc + __gcdiff.realloc + __gcdiff.poolalloc + __gcdiff.bigalloc,
+            __evals,
+        ),
+    )
+    return __time, __gctime, __memory, __allocs
+end
 # `eval` an expression that forcibly defines the specified benchmark at
 # top-level in order to allow transfer of locally-scoped variables into
 # benchmark scope.
@@ -526,7 +575,7 @@ function generate_benchmark_definition(
     @nospecialize
     corefunc = gensym("core")
     samplefunc = gensym("sample")
-    linux_perf_func = gensym("perf")
+    customisable_func = gensym("customisable")
     type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
     signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
     signature_def = Expr(
@@ -570,71 +619,70 @@ function generate_benchmark_definition(
             @noinline function $(samplefunc)(
                 $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
             )
-                $(setup)
-                __evals = __params.evals
-                __gc_start = Base.gc_num()
-                __start_time = time_ns()
-                __return_val = $(invocation)
-                for __iter in 2:__evals
-                    $(invocation)
-                end
-                __sample_time = time_ns() - __start_time
-                __gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
-                $(teardown)
-                __time = max((__sample_time / __evals) - __params.overhead, 0.001)
-                __gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
-                __memory = Int(Base.fld(__gcdiff.allocd, __evals))
-                __allocs = Int(
-                    Base.fld(
-                        __gcdiff.malloc +
-                        __gcdiff.realloc +
-                        __gcdiff.poolalloc +
-                        __gcdiff.bigalloc,
-                        __evals,
-                    ),
+                $BenchmarkTools.@noinline $(setup)
+                # Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation
+                # Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions
+                __prehook_result, __posthook_result, __return_val = $BenchmarkTools.@noinline (
+                    function (__evals)
+                        prehook_result = $BenchmarkTools.samplefunc_prehook()
+                        # We'll run it evals times.
+                        $BenchmarkTools.@noinline __return_val_2 = $(invocation)
+                        for __iter in 2:__evals
+                            $BenchmarkTools.@noinline $(invocation)
+                        end
+                        posthook_result = $BenchmarkTools.samplefunc_posthook()
+                        # trick the compiler not to eliminate the code
+                        return prehook_result, posthook_result, __return_val_2
+                    end
+                )(
+                    __params.evals
                 )
-                return __time, __gctime, __memory, __allocs, __return_val
+                $(teardown)
+                return $BenchmarkTools.samplefunc_sample_result(
+                    __params, nothing, __prehook_result, __posthook_result
+                )...,
+                __return_val
             end
-            @noinline function $(linux_perf_func)(
+            @noinline function $(customisable_func)(
                 $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
             )
-                # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
-                __linux_perf_groups = $LinuxPerf.set_default_spaces(
-                    $LinuxPerf.parse_groups(__params.linux_perf_groups),
-                    __params.linux_perf_spaces,
-                )
-                __linux_perf_bench = $LinuxPerf.make_bench_threaded(
-                    __linux_perf_groups; threads=__params.linux_perf_threads
-                )
-
+                local __setup_prehook_result
                 try
+                    __setup_prehook_result = $BenchmarkTools.@noinline __params.setup_prehook(
+                        __params
+                    )
                     $BenchmarkTools.@noinline $(setup)
-                    __evals = __params.evals
                     # Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation
                     # Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions
-                    $BenchmarkTools.@noinline (
+                    __prehook_result, __posthook_result, __return_val = $BenchmarkTools.@noinline (
                         function (__evals)
-                            $LinuxPerf.enable_all!()
+                            prehook_result = __params.prehook()
                             # We'll run it evals times.
                             $BenchmarkTools.@noinline __return_val_2 = $(invocation)
                             for __iter in 2:__evals
                                 $BenchmarkTools.@noinline $(invocation)
                             end
-                            $LinuxPerf.disable_all!()
+                            posthook_result = __params.posthook()
                             # trick the compiler not to eliminate the code
-                            return __return_val_2
+                            return prehook_result, posthook_result, __return_val_2
                         end
                     )(
-                        __evals
+                        __params.evals
                     )
-                    return $LinuxPerf.Stats(__linux_perf_bench)
+                    return __params.sample_result(
+                        __params,
+                        __setup_prehook_result,
+                        __prehook_result,
+                        __posthook_result,
+                    ),
+                    __return_val
                 finally
-                    close(__linux_perf_bench)
                     $(teardown)
+                    __params.teardown_posthook(__params, __setup_prehook_result)
                 end
             end
             $BenchmarkTools.Benchmark(
-                $(samplefunc), $(linux_perf_func), $(quote_vals), $(params)
+                $(samplefunc), $(customisable_func), $(quote_vals), $(params)
             )
         end,
     )