Skip to content

Commit

Permalink
Test util cleanup (#2003)
Browse files Browse the repository at this point in the history
Don't clear the memory allocator cache as it shouldn't be necessary
  • Loading branch information
naoyam authored Sep 28, 2022
1 parent 3ca21eb commit dc45835
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 52 deletions.
57 changes: 7 additions & 50 deletions torch/csrc/jit/codegen/cuda/test/test_gpu_validator.h
Original file line number Diff line number Diff line change
@@ -1,49 +1,22 @@
#include <torch/csrc/jit/codegen/cuda/executor.h>
#pragma once

#include <torch/csrc/jit/codegen/cuda/executor_utils.h>
#include <torch/csrc/jit/codegen/cuda/expr_evaluator.h>
#include <torch/csrc/jit/codegen/cuda/fusion.h>
#include <torch/csrc/jit/codegen/cuda/ir_iostream.h>
#include <torch/csrc/jit/codegen/cuda/lower_utils.h>

#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDACachingAllocator.h>
#include <torch/torch.h>

#include <unordered_map>

// Tests go in torch::jit
namespace torch {
namespace jit {
namespace fuser {
namespace cuda {

inline bool deviceMajorMinorCheck(int major, int minor = 0) {
auto dev_prop = at::cuda::getCurrentDeviceProperties();
if (dev_prop->major < major ||
(dev_prop->major == major && dev_prop->minor < minor)) {
return false;
}
return true;
}

inline int deviceSMCount() {
int sm_count = at::cuda::getCurrentDeviceProperties()->multiProcessorCount;
return sm_count;
}
using namespace torch::jit::fuser::cuda;

class NVFuserTest : public ::testing::Test {
protected:
void SetUp() override {
// requires PASCAL or newer
if (!deviceMajorMinorCheck(6)) {
GTEST_SKIP() << "skipping tests on pre-PASCAL GPUs";
}
setFillAllocationWithNan(true);
}

void TearDown() override {
c10::cuda::CUDACachingAllocator::emptyCache();
}
};
namespace {

struct ValidationConstants {
// Tolerances generated from randn + add + sum fusion
Expand Down Expand Up @@ -74,8 +47,6 @@ struct ValidationConstants {
double base_float_rel_tol = -1;
};

namespace {

// Returns abs and relative values to use for validation
std::pair<double, double> getTolerance(
DataType dtype,
Expand Down Expand Up @@ -338,15 +309,13 @@ ExpressionEvaluator bindInputsAndLaunchParams(
return expr_eval;
}

} // namespace

// Validation will look through the fusion and figure out how many elements were
// reduced to create each output. It will then compute a tolernace to use for
// allclose based on experimental results. The experimental results were based
// on adding two tensors then summing them. This of course has an assumption
// that we're always summing values between -2 and 2. If we start summing values
// larger than that this approach might not hold.
inline void testValidate(
void testValidate(
Fusion* fusion,
const std::vector<at::Tensor>& fusion_outputs,
const at::ArrayRef<IValue>& aten_inputs,
Expand Down Expand Up @@ -466,18 +435,6 @@ inline void testValidate(
}
}

inline void clearL2Cache() {
torch::NoGradGuard no_grad;
auto l2_cache_size = at::cuda::getCurrentDeviceProperties()->l2CacheSize;
auto options =
torch::TensorOptions().dtype(torch::kFloat32).device(at::kCUDA, 0);

auto l2_elems = l2_cache_size / 4;
torch::Tensor t0 = torch::empty(l2_elems, options);
torch::Tensor t1 = torch::clone(t0);
};

} // namespace cuda
} // namespace fuser
} // namespace
} // namespace jit
} // namespace torch
51 changes: 49 additions & 2 deletions torch/csrc/jit/codegen/cuda/test/test_utils.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
#pragma once

#include <cstddef>

#include <torch/csrc/jit/codegen/cuda/executor.h>
#include <torch/csrc/jit/codegen/cuda/expr_evaluator.h>
#include <torch/csrc/jit/codegen/cuda/ir_all_nodes.h>

#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDACachingAllocator.h>
#include <torch/torch.h>

#include <gtest/gtest.h>

#include <cstddef>

// Tests go in torch::jit
namespace torch {
namespace jit {
Expand Down Expand Up @@ -84,6 +92,45 @@ int64_t prime_numbers[] = {
1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163,
1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223};

bool deviceMajorMinorCheck(int major, int minor = 0) {
auto dev_prop = at::cuda::getCurrentDeviceProperties();
if (dev_prop->major < major ||
(dev_prop->major == major && dev_prop->minor < minor)) {
return false;
}
return true;
}

int deviceSMCount() {
int sm_count = at::cuda::getCurrentDeviceProperties()->multiProcessorCount;
return sm_count;
}

void clearL2Cache() {
torch::NoGradGuard no_grad;
auto l2_cache_size = at::cuda::getCurrentDeviceProperties()->l2CacheSize;
auto options =
torch::TensorOptions().dtype(torch::kFloat32).device(at::kCUDA, 0);

auto l2_elems = l2_cache_size / 4;
torch::Tensor t0 = torch::empty(l2_elems, options);
torch::Tensor t1 = torch::clone(t0);
};

} // namespace

// Fixture class must be uniquely identified, i.e., can't be in an
// anonymous namespace
class NVFuserTest : public ::testing::Test {
protected:
void SetUp() override {
// requires PASCAL or newer
if (!deviceMajorMinorCheck(6)) {
GTEST_SKIP() << "skipping tests on pre-PASCAL GPUs";
}
setFillAllocationWithNan(true);
}
};

} // namespace jit
} // namespace torch

0 comments on commit dc45835

Please sign in to comment.