From 0dab160fb2177d178eef3148c6a529e0855009e9 Mon Sep 17 00:00:00 2001 From: Christian Sarofeen Date: Sat, 27 Aug 2022 10:28:01 -0400 Subject: [PATCH] Fix softmax bwd sizes. (#1890) --- benchmarks/cpp/nvfuser/softmax_backward.cpp | 32 ++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/benchmarks/cpp/nvfuser/softmax_backward.cpp b/benchmarks/cpp/nvfuser/softmax_backward.cpp index 8fb35083c6dc7..be8dfda275d0e 100644 --- a/benchmarks/cpp/nvfuser/softmax_backward.cpp +++ b/benchmarks/cpp/nvfuser/softmax_backward.cpp @@ -177,13 +177,13 @@ NVFUSER_BENCHMARK_RUN(NvFuserScheduler_Softmax_BWD_Outer_fp32) NVFUSER_BENCHMARK_RUN(NvFuserScheduler_Softmax_BWD_Outer_fp32) // ->RangeMultiplier(2) - ->Ranges({{32768, 32 * 1024 * 1024}, {2, 16}}) + ->Ranges({{32768, 16 * 1024 * 1024}, {2, 16}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); NVFUSER_BENCHMARK_RUN(NvFuserScheduler_Softmax_BWD_Outer_fp32) // ->RangeMultiplier(2) - ->Ranges({{2, 16}, {32768, 32 * 1024 * 1024}}) + ->Ranges({{2, 16}, {32768, 16 * 1024 * 1024}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); @@ -201,13 +201,13 @@ NVFUSER_BENCHMARK_RUN(NvFuserScheduler_Softmax_BWD_Outer_fp16) NVFUSER_BENCHMARK_RUN(NvFuserScheduler_Softmax_BWD_Outer_fp16) // ->RangeMultiplier(2) - ->Ranges({{32768, 32 * 1024 * 1024}, {2, 16}}) + ->Ranges({{32768, 16 * 1024 * 1024}, {2, 16}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); NVFUSER_BENCHMARK_RUN(NvFuserScheduler_Softmax_BWD_Outer_fp16) // ->RangeMultiplier(2) - ->Ranges({{2, 16}, {32768, 32 * 1024 * 1024}}) + ->Ranges({{2, 16}, {32768, 16 * 1024 * 1024}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); @@ -225,13 +225,13 @@ NVFUSER_BENCHMARK_RUN(NvFuserScheduler_Softmax_BWD_Inner_fp32) NVFUSER_BENCHMARK_RUN(NvFuserScheduler_Softmax_BWD_Inner_fp32) // ->RangeMultiplier(2) - ->Ranges({{32768, 32 * 1024 * 1024}, {2, 16}}) + ->Ranges({{32768, 16 * 1024 * 1024}, {2, 16}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); NVFUSER_BENCHMARK_RUN(NvFuserScheduler_Softmax_BWD_Inner_fp32) // ->RangeMultiplier(2) - ->Ranges({{2, 16}, {32768, 32 * 1024 * 1024}}) + ->Ranges({{2, 16}, {32768, 16 * 1024 * 1024}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); @@ -249,13 +249,13 @@ NVFUSER_BENCHMARK_RUN(NvFuserScheduler_Softmax_BWD_Inner_fp16) NVFUSER_BENCHMARK_RUN(NvFuserScheduler_Softmax_BWD_Inner_fp16) // ->RangeMultiplier(2) - ->Ranges({{32768, 32 * 1024 * 1024}, {2, 16}}) + ->Ranges({{32768, 16 * 1024 * 1024}, {2, 16}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); NVFUSER_BENCHMARK_RUN(NvFuserScheduler_Softmax_BWD_Inner_fp16) // ->RangeMultiplier(2) - ->Ranges({{2, 16}, {32768, 32 * 1024 * 1024}}) + ->Ranges({{2, 16}, {32768, 16 * 1024 * 1024}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); @@ -275,13 +275,13 @@ BENCHMARK(Baseline_Softmax_BWD_Outer_fp32) BENCHMARK(Baseline_Softmax_BWD_Outer_fp32) // ->RangeMultiplier(2) - ->Ranges({{32768, 32 * 1024 * 1024}, {2, 16}}) + ->Ranges({{32768, 16 * 1024 * 1024}, {2, 16}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); BENCHMARK(Baseline_Softmax_BWD_Outer_fp32) // ->RangeMultiplier(2) - ->Ranges({{2, 16}, {32768, 32 * 1024 * 1024}}) + ->Ranges({{2, 16}, {32768, 16 * 1024 * 1024}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); @@ -299,13 +299,13 @@ BENCHMARK(Baseline_Softmax_BWD_Outer_fp16) BENCHMARK(Baseline_Softmax_BWD_Outer_fp16) // ->RangeMultiplier(2) - ->Ranges({{32768, 32 * 1024 * 1024}, {2, 16}}) + ->Ranges({{32768, 16 * 1024 * 1024}, {2, 16}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); BENCHMARK(Baseline_Softmax_BWD_Outer_fp16) // ->RangeMultiplier(2) - ->Ranges({{2, 16}, {32768, 32 * 1024 * 1024}}) + ->Ranges({{2, 16}, {32768, 16 * 1024 * 1024}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); @@ -323,13 +323,13 @@ BENCHMARK(Baseline_Softmax_BWD_Inner_fp32) BENCHMARK(Baseline_Softmax_BWD_Inner_fp32) // ->RangeMultiplier(2) - ->Ranges({{32768, 32 * 1024 * 1024}, {2, 16}}) + ->Ranges({{32768, 16 * 1024 * 1024}, {2, 16}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); BENCHMARK(Baseline_Softmax_BWD_Inner_fp32) // ->RangeMultiplier(2) - ->Ranges({{2, 16}, {32768, 32 * 1024 * 1024}}) + ->Ranges({{2, 16}, {32768, 16 * 1024 * 1024}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); @@ -347,13 +347,13 @@ BENCHMARK(Baseline_Softmax_BWD_Inner_fp16) BENCHMARK(Baseline_Softmax_BWD_Inner_fp16) // ->RangeMultiplier(2) - ->Ranges({{32768, 32 * 1024 * 1024}, {2, 16}}) + ->Ranges({{32768, 16 * 1024 * 1024}, {2, 16}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime(); BENCHMARK(Baseline_Softmax_BWD_Inner_fp16) // ->RangeMultiplier(2) - ->Ranges({{2, 16}, {32768, 32 * 1024 * 1024}}) + ->Ranges({{2, 16}, {32768, 16 * 1024 * 1024}}) ->Unit(benchmark::kMicrosecond) ->UseManualTime();