From bee6c69979d8c34d6d6ef7514f8886cf1416d64f Mon Sep 17 00:00:00 2001 From: Naoya Maruyama Date: Wed, 13 Jul 2022 23:13:48 +0900 Subject: [PATCH] bug fix (#1819) --- torch/csrc/jit/codegen/cuda/test/test_gpu.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torch/csrc/jit/codegen/cuda/test/test_gpu.cpp b/torch/csrc/jit/codegen/cuda/test/test_gpu.cpp index 5226e1778b770..e16beb70302d2 100644 --- a/torch/csrc/jit/codegen/cuda/test/test_gpu.cpp +++ b/torch/csrc/jit/codegen/cuda/test/test_gpu.cpp @@ -9636,7 +9636,6 @@ TEST_F(NVFuserTest, FusionPersistentNormLocalShared_CUDA) { torch::jit::fuser::cuda::FusionExecutor fe; fe.compileFusion(&fusion, aten_inputs); - fe.runFusion(aten_inputs, {cg_static_out, cg_dynamic_out}); auto properties = at::cuda::getDeviceProperties(0); // Require 70KB of smem to run test @@ -9645,6 +9644,8 @@ TEST_F(NVFuserTest, FusionPersistentNormLocalShared_CUDA) { GTEST_SKIP() << "not enough shared memory space on device to run test"; } + fe.runFusion(aten_inputs, {cg_static_out, cg_dynamic_out}); + auto at_mu = at::mean(aten_input.to(at::kDouble), -1).unsqueeze(1); auto at_var = at::var(aten_input.to(at::kDouble), -1, false).unsqueeze(1); auto at_rvar = at::rsqrt(at::add(at_var, kEps));