Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix multidevice tests #2574

Merged
merged 3 commits into from
Mar 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions third_party/nvfuser/test/test_gpu_multidevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ using namespace at::indexing;
// e.g.: mpirun -np 4 ./build/bin/nvfuser_tests
// --gtest_filter=NVFuserTest.FusionMultiGPU_Reduce

TEST_F(NVFuserTest, FusionMultiClusterProcessGroup) {
TEST_F(NVFuserTest, FusionMultiClusterProcessGroup_CUDA) {
int grank, gsize;

if (parseEnv(grank, gsize)) {
Expand All @@ -71,7 +71,7 @@ TEST_F(NVFuserTest, FusionMultiClusterProcessGroup) {
pg->barrier();
}

TEST_F(NVFuserTest, SendRecvTest) {
TEST_F(NVFuserTest, SendRecvTest_CUDA) {
// Using the new interface to build multi-cluster fusion
MultiClusterFusion fusion;
int grank, gsize;
Expand Down Expand Up @@ -119,7 +119,7 @@ TEST_F(NVFuserTest, SendRecvTest) {
pg->barrier();
}

TEST_F(NVFuserTest, FusionMultiGPU) {
TEST_F(NVFuserTest, FusionMultiGPU_CUDA) {
// ===========================================================
// FUSION
// ===========================================================
Expand Down Expand Up @@ -183,6 +183,7 @@ TEST_F(NVFuserTest, FusionMultiGPU) {
<< "this test must be run with at least 2 GPUs, however there are "
<< number_of_gpus << " GPUs available";
}
auto device = at::Device("cuda:" + std::to_string(grank));

// ===========================================================
// RUNTIME
Expand All @@ -194,9 +195,7 @@ TEST_F(NVFuserTest, FusionMultiGPU) {

// Create input tensors. Each rank is binded to a different GPU
c10::TensorOptions options;
options = at::TensorOptions()
.dtype(at::kFloat)
.device(at::Device("cuda:" + std::to_string(grank)));
options = at::TensorOptions().dtype(at::kFloat).device(device);
at::Tensor input_tv = at::randn(
{2, 8, 8}, options); // caveat: concrete values only used on rank 0

Expand All @@ -220,7 +219,7 @@ TEST_F(NVFuserTest, FusionMultiGPU) {
pg->barrier();
}

TEST_F(NVFuserTest, FusionMultiGPU_Reduce) {
TEST_F(NVFuserTest, FusionMultiGPU_Reduce_CUDA) {
/*
Test to be run on 4 ranks, each rank will be associated with a unique device
and a unique cluster.
Expand Down Expand Up @@ -328,6 +327,7 @@ TEST_F(NVFuserTest, FusionMultiGPU_Reduce) {
<< "this test must be run with at least 4 GPUs, however there are "
<< number_of_gpus << " GPUs available";
}
auto device = at::Device("cuda:" + std::to_string(grank));

// ===========================================================
// RUNTIME
Expand All @@ -338,9 +338,7 @@ TEST_F(NVFuserTest, FusionMultiGPU_Reduce) {

// Create input tensors. Each rank is binded to a different GPU
c10::TensorOptions options;
options = at::TensorOptions()
.dtype(at::kFloat)
.device(at::Device("cuda:" + std::to_string(grank)));
options = at::TensorOptions().dtype(at::kFloat).device(device);
at::Tensor input_tv = at::randn(
{2, 8, 8}, options); // caveat: concrete values only used on rank 0

Expand Down
3 changes: 1 addition & 2 deletions third_party/nvfuser/test/test_multicluster_fusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace nvfuser {

using namespace at::indexing;

TEST_F(NVFuserTest, MultiClusterFusion) {
TEST_F(NVFuserTest, MultiClusterFusion_CUDA) {
MultiClusterFusion fusion;
FusionGuard fg(&fusion);

Expand Down Expand Up @@ -128,7 +128,6 @@ TEST_F(NVFuserTest, MultiClusterFusion) {
"AggregateDag's outputs:{\n"
" AggregateVal representing Val T6_g[ iS14{i3} ] on cluster 3\n"
"}"};

TORCH_INTERNAL_ASSERT(
obtained_string_aDag == ref_string_aDag,
"the obtained AggregateDag is not the one expected");
Expand Down