Skip to content

Commit

Permalink
Introduce hooks to allow more customisable benchmarking
Browse files Browse the repository at this point in the history
  • Loading branch information
Zentrik committed Jun 20, 2024
1 parent 87081e6 commit fd2f88a
Show file tree
Hide file tree
Showing 12 changed files with 368 additions and 288 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ jobs:
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
env:
TEST_PERF_INTEGRATION: false
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v3
with:
Expand Down
2 changes: 0 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ version = "1.6.0"
[deps]
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
LinuxPerf = "b4c46c6c-4fb0-484d-a11a-41bc3392d094"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
Expand All @@ -17,7 +16,6 @@ Aqua = "0.8"
Compat = ">= 4.11.0"
JSON = "0.18, 0.19, 0.20, 0.21"
JuliaFormatter = "1"
LinuxPerf = "0.3.8"
Logging = "<0.0.1, 1"
Printf = "<0.0.1, 1"
Profile = "<0.0.1, 1"
Expand Down
5 changes: 0 additions & 5 deletions docs/src/manual.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,6 @@ You can pass the following keyword arguments to `@benchmark`, `@benchmarkable`,
- `gcsample`: If `true`, run `gc()` before each sample. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.gcsample = false`.
- `time_tolerance`: The noise tolerance for the benchmark's time estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = 0.05`.
- `memory_tolerance`: The noise tolerance for the benchmark's memory estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = 0.01`.
- `enable_linux_perf`: If `true`, profile using perf `evals` times. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.enable_linux_perf = false`.
- `linux_perf_groups`: The event groups you want to profile, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups = "(instructions,branch-instructions)"`.
- `linux_perf_spaces`: Which of user, kernel and hypervisor space you want to profile, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces = (true, false, false)`.
- `linux_perf_threads`: If `true`, all threads are profiled else only the the thread which starts the benchmark is profiled, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads = true`.
- `linux_perf_gcscrub`: If `true`, run `gc()` before executing the profiling run. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub = true`.

To change the default values of the above fields, one can mutate the fields of `BenchmarkTools.DEFAULT_PARAMETERS`, for example:

Expand Down
2 changes: 0 additions & 2 deletions src/BenchmarkTools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ using Printf
using Profile
using Compat: pkgversion, @noinline

using LinuxPerf: LinuxPerf

##############
# Parameters #
##############
Expand Down
152 changes: 100 additions & 52 deletions src/execution.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ end

mutable struct Benchmark
samplefunc
linux_perf_func
customisable_func
quote_vals
params::Parameters
end
Expand Down Expand Up @@ -110,25 +110,56 @@ end
function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, kwargs...)
params = Parameters(p; kwargs...)
@assert params.seconds > 0.0 "time limit must be greater than 0.0"
if warmup
b.samplefunc(b.quote_vals, Parameters(params; evals=1)) #warmup sample
@assert params.enable_customisable_func in (:FALSE, :ALL, :LAST) "invalid value $(params.enable_customisable_func) for enable_customisable_func which must be :FALSE, :ALL or :LAST"
@assert !(
params.run_customisable_func_only && params.enable_customisable_func == :FALSE
) "run_customisable_func_only is set to true, but enable_customisable_func is set to :FALSE"
if warmup #warmup sample
params.run_customisable_func_only &&
b.samplefunc(b.quote_vals, Parameters(params; evals=1))
!params.run_customisable_func_only &&
b.customisable_func(b.quote_vals, Parameters(params; evals=1))
end
trial = Trial(params)
if params.enable_customisable_func == :ALL
trial.customisable_result = []
trial.customisable_result_for_every_sample = true
end
params.gctrial && gcscrub()
start_time = Base.time()
s = b.samplefunc(b.quote_vals, params)
push!(trial, s[1:(end - 1)]...)
return_val = s[end]

return_val = nothing
if !params.run_customisable_func_only
s = b.samplefunc(b.quote_vals, params)
push!(trial, s[1:(end - 1)]...)
return_val = s[end]
end
if params.enable_customisable_func == :ALL
params.customisable_gcsample && gcscrub()
push!(trial.customisable_result, b.customisable_func(b.quote_vals, params)[1])
end

iters = 2
while (Base.time() - start_time) < params.seconds && iters params.samples
params.gcsample && gcscrub()
push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)

if params.enable_customisable_func == :ALL
params.customisable_gcsample && gcscrub()
push!(trial.customisable_result, b.customisable_func(b.quote_vals, params)[1])
end

iters += 1
end

if params.enable_linux_perf
params.linux_perf_gcscrub && gcscrub()
trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params)
if params.enable_customisable_func !== :FALSE
params.customisable_gcsample && gcscrub()
s = b.customisable_func(b.quote_vals, params)
trial.customisable_result = s[1]

if params.run_customisable_func_only
return_val = s[end]
end
end

return trial, return_val
Expand Down Expand Up @@ -513,6 +544,24 @@ macro benchmarkable(args...)
end
end

samplefunc_prehook() = (Base.gc_num(), time_ns())
samplefunc_posthook = samplefunc_prehook
function samplefunc_sample_result(__params, _, prehook_result, posthook_result)
__evals = __params.evals
__sample_time = posthook_result[2] - prehook_result[2]
__gcdiff = Base.GC_Diff(posthook_result[1], prehook_result[1])

__time = max((__sample_time / __evals) - __params.overhead, 0.001)
__gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
__memory = Int(Base.fld(__gcdiff.allocd, __evals))
__allocs = Int(
Base.fld(
__gcdiff.malloc + __gcdiff.realloc + __gcdiff.poolalloc + __gcdiff.bigalloc,
__evals,
),
)
return __time, __gctime, __memory, __allocs
end
# `eval` an expression that forcibly defines the specified benchmark at
# top-level in order to allow transfer of locally-scoped variables into
# benchmark scope.
Expand All @@ -526,7 +575,7 @@ function generate_benchmark_definition(
@nospecialize
corefunc = gensym("core")
samplefunc = gensym("sample")
linux_perf_func = gensym("perf")
customisable_func = gensym("customisable")
type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
signature_def = Expr(
Expand Down Expand Up @@ -570,71 +619,70 @@ function generate_benchmark_definition(
@noinline function $(samplefunc)(
$(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
)
$(setup)
__evals = __params.evals
__gc_start = Base.gc_num()
__start_time = time_ns()
__return_val = $(invocation)
for __iter in 2:__evals
$(invocation)
end
__sample_time = time_ns() - __start_time
__gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
$(teardown)
__time = max((__sample_time / __evals) - __params.overhead, 0.001)
__gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
__memory = Int(Base.fld(__gcdiff.allocd, __evals))
__allocs = Int(
Base.fld(
__gcdiff.malloc +
__gcdiff.realloc +
__gcdiff.poolalloc +
__gcdiff.bigalloc,
__evals,
),
$BenchmarkTools.@noinline $(setup)
# Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation
# Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions
__prehook_result, __posthook_result, __return_val = $BenchmarkTools.@noinline (
function (__evals)
prehook_result = $BenchmarkTools.samplefunc_prehook()
# We'll run it evals times.
$BenchmarkTools.@noinline __return_val_2 = $(invocation)
for __iter in 2:__evals
$BenchmarkTools.@noinline $(invocation)
end
posthook_result = $BenchmarkTools.samplefunc_posthook()
# trick the compiler not to eliminate the code
return prehook_result, posthook_result, __return_val_2
end
)(
__params.evals
)
return __time, __gctime, __memory, __allocs, __return_val
$(teardown)
return $BenchmarkTools.samplefunc_sample_result(
__params, nothing, __prehook_result, __posthook_result
)...,
__return_val
end
@noinline function $(linux_perf_func)(
@noinline function $(customisable_func)(
$(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
)
# Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
__linux_perf_groups = $LinuxPerf.set_default_spaces(
$LinuxPerf.parse_groups(__params.linux_perf_groups),
__params.linux_perf_spaces,
)
__linux_perf_bench = $LinuxPerf.make_bench_threaded(
__linux_perf_groups; threads=__params.linux_perf_threads
)

local __setup_prehook_result
try
__setup_prehook_result = $BenchmarkTools.@noinline __params.setup_prehook(
__params
)
$BenchmarkTools.@noinline $(setup)
__evals = __params.evals
# Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation
# Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions
$BenchmarkTools.@noinline (
__prehook_result, __posthook_result, __return_val = $BenchmarkTools.@noinline (
function (__evals)
$LinuxPerf.enable_all!()
prehook_result = __params.prehook()
# We'll run it evals times.
$BenchmarkTools.@noinline __return_val_2 = $(invocation)
for __iter in 2:__evals
$BenchmarkTools.@noinline $(invocation)
end
$LinuxPerf.disable_all!()
posthook_result = __params.posthook()
# trick the compiler not to eliminate the code
return __return_val_2
return prehook_result, posthook_result, __return_val_2
end
)(
__evals
__params.evals
)
return $LinuxPerf.Stats(__linux_perf_bench)
return __params.sample_result(
__params,
__setup_prehook_result,
__prehook_result,
__posthook_result,
),
__return_val
finally
close(__linux_perf_bench)
$(teardown)
__params.teardown_posthook(__params, __setup_prehook_result)
end
end
$BenchmarkTools.Benchmark(
$(samplefunc), $(linux_perf_func), $(quote_vals), $(params)
$(samplefunc), $(customisable_func), $(quote_vals), $(params)
)
end,
)
Expand Down
Loading

0 comments on commit fd2f88a

Please sign in to comment.