Skip to content

Commit

Permalink
Enable tests previously disabled due to an aliasing bug (#2005)
Browse files Browse the repository at this point in the history
* Enable tests previously disabled due to an aliasing bug

The bug was fixed by #1792
  • Loading branch information
naoyam authored Sep 28, 2022
1 parent 967aa77 commit 45045cd
Showing 1 changed file with 13 additions and 11 deletions.
24 changes: 13 additions & 11 deletions torch/csrc/jit/codegen/cuda/test/test_gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9413,7 +9413,7 @@ TEST_F(NVFuserTest, FusionMagicSchedulerInstanceNormalizationBackward_CUDA) {
"");
}

TEST_F(NVFuserTest, FusionPersistentSoftmaxLocalSmem_CUDA) {
TEST_F(NVFuserTest, FusionPersistentSoftmaxLocalShared_CUDA) {
Fusion fusion;
FusionGuard fg(&fusion);

Expand Down Expand Up @@ -9519,10 +9519,11 @@ TEST_F(NVFuserTest, FusionPersistentSoftmaxLocalSmem_CUDA) {
const int64_t dimy = 16384;

auto properties = at::cuda::getDeviceProperties(0);
// Require 70KB of smem to run test
const size_t required_smem_size = 70 << 10;
const size_t required_smem_size =
(dimy - static_size) * sizeof(float) + TIDX * sizeof(float);
if (properties->sharedMemPerBlockOptin < required_smem_size) {
GTEST_SKIP() << "not enough shared memory space on device to run test";
GTEST_SKIP() << "not enough shared memory space on device to run test: "
<< properties->sharedMemPerBlock;
}

auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
Expand Down Expand Up @@ -9708,6 +9709,14 @@ TEST_F(NVFuserTest, FusionPersistentNormLocalShared_CUDA) {
const float kEps = 1e-5;
auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);

auto properties = at::cuda::getDeviceProperties(0);
const size_t required_smem_size =
(dimy - static_size) * sizeof(float) + TIDX * sizeof(float);
if (properties->sharedMemPerBlockOptin < required_smem_size) {
GTEST_SKIP() << "not enough shared memory space on device to run test: "
<< properties->sharedMemPerBlock;
}

at::Tensor aten_input = at::randn({dimx, dimy}, options);
at::Tensor aten_static_in = aten_input.narrow(1, 0, static_size);
at::Tensor aten_dynamic_in =
Expand All @@ -9723,13 +9732,6 @@ TEST_F(NVFuserTest, FusionPersistentNormLocalShared_CUDA) {
torch::jit::fuser::cuda::FusionExecutor fe;
fe.compileFusion(&fusion, aten_inputs);

auto properties = at::cuda::getDeviceProperties(0);
// Require 70KB of smem to run test
const size_t required_smem_size = 70 << 10;
if (properties->sharedMemPerBlockOptin < required_smem_size) {
GTEST_SKIP() << "not enough shared memory space on device to run test";
}

fe.runFusion(aten_inputs, {cg_static_out, cg_dynamic_out});

auto at_mu = at::mean(aten_input.to(at::kDouble), -1).unsqueeze(1);
Expand Down

0 comments on commit 45045cd

Please sign in to comment.