Skip to content

Commit

Permalink
bug fix (csarofeen#1819)
Browse files Browse the repository at this point in the history
  • Loading branch information
naoyam committed Jul 13, 2022
1 parent 4413c8f commit bee6c69
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion torch/csrc/jit/codegen/cuda/test/test_gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9636,7 +9636,6 @@ TEST_F(NVFuserTest, FusionPersistentNormLocalShared_CUDA) {

torch::jit::fuser::cuda::FusionExecutor fe;
fe.compileFusion(&fusion, aten_inputs);
fe.runFusion(aten_inputs, {cg_static_out, cg_dynamic_out});

auto properties = at::cuda::getDeviceProperties(0);
// Require 70KB of smem to run test
Expand All @@ -9645,6 +9644,8 @@ TEST_F(NVFuserTest, FusionPersistentNormLocalShared_CUDA) {
GTEST_SKIP() << "not enough shared memory space on device to run test";
}

fe.runFusion(aten_inputs, {cg_static_out, cg_dynamic_out});

auto at_mu = at::mean(aten_input.to(at::kDouble), -1).unsqueeze(1);
auto at_var = at::var(aten_input.to(at::kDouble), -1, false).unsqueeze(1);
auto at_rvar = at::rsqrt(at::add(at_var, kEps));
Expand Down

0 comments on commit bee6c69

Please sign in to comment.