Skip to content

Commit

Permalink
Don't allocate arrays during set-up; it introduces noise.
Browse files Browse the repository at this point in the history
  • Loading branch information
maleadt committed Sep 20, 2023
1 parent 09ea267 commit dedaad6
Showing 1 changed file with 4 additions and 9 deletions.
13 changes: 4 additions & 9 deletions benchmarks/blas.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,10 @@ function blas_benchmark(group, a_type, b_type, cd_type, N, M=N, K=N; alpha=true,

# NOTE: we use `cuStreamSynchronize` instead of `synchronize` to avoid
# influence from the Julia scheduler
group[name] = @benchmarkable(
begin
GemmKernels.matmatmul!(c, $a_layout, $b_layout, a, b, $alpha, $beta; $(kwargs)...)
CUDA.cuStreamSynchronize(stream())
end,
setup=(a=CuArray($a_h); b=CuArray($b_h); c=CuArray($c_h);
CUDA.cuStreamSynchronize(stream())),
teardown=(CUDA.unsafe_free!(a); CUDA.unsafe_free!(b); CUDA.unsafe_free!(c))
)
group[name] = @benchmarkable begin
GemmKernels.matmatmul!($c, $a_layout, $b_layout, $a, $b, $alpha, $beta; $(kwargs)...)
CUDA.cuStreamSynchronize(stream())
end
end

let group = addgroup!(group, "WMMA")
Expand Down

0 comments on commit dedaad6

Please sign in to comment.