diff --git a/torch/csrc/jit/codegen/cuda/test/test_gpu_tensor_factories.cpp b/torch/csrc/jit/codegen/cuda/test/test_gpu_tensor_factories.cpp index a1298e6621e0b..76c7b811db783 100644 --- a/torch/csrc/jit/codegen/cuda/test/test_gpu_tensor_factories.cpp +++ b/torch/csrc/jit/codegen/cuda/test/test_gpu_tensor_factories.cpp @@ -43,6 +43,9 @@ TEST_F(NVFuserTest, FusionStandaloneFull_CUDA) { fusion->addInput(fill_val2); fusion->addInput(fill_val3); for (auto dtype : dtypes) { + if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) { + continue; + } auto out_tv = full({size}, fill_val1, aten_to_data_type(dtype)); fusion->addOutput(out_tv); out_tv = full({size, size}, fill_val2, aten_to_data_type(dtype)); @@ -57,6 +60,9 @@ TEST_F(NVFuserTest, FusionStandaloneFull_CUDA) { std::vector expect; expect.reserve(dtypes.size()); for (auto dtype : dtypes) { + if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) { + continue; + } const auto options = at::TensorOptions().dtype(dtype).device(at::kCUDA, 0); expect.emplace_back(at::full({size}, 11, options)); @@ -94,6 +100,9 @@ TEST_F(NVFuserTest, FusionStandaloneZeros_CUDA) { Val* size = IrBuilder::create(); fusion->addInput(size); for (auto dtype : dtypes) { + if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) { + continue; + } auto out_tv = zeros({size}, aten_to_data_type(dtype)); fusion->addOutput(out_tv); out_tv = zeros({size, size}, aten_to_data_type(dtype)); @@ -108,6 +117,9 @@ TEST_F(NVFuserTest, FusionStandaloneZeros_CUDA) { std::vector expect; expect.reserve(dtypes.size()); for (auto dtype : dtypes) { + if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) { + continue; + } const auto options = at::TensorOptions().dtype(dtype).device(at::kCUDA, 0); expect.emplace_back(at::zeros({size}, options)); @@ -145,6 +157,9 @@ TEST_F(NVFuserTest, FusionStandaloneOnes_CUDA) { Val* size = IrBuilder::create(); fusion->addInput(size); for (auto dtype : dtypes) { + if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) { + continue; + } auto out_tv = ones({size}, aten_to_data_type(dtype)); fusion->addOutput(out_tv); out_tv = ones({size, size}, aten_to_data_type(dtype)); @@ -159,6 +174,9 @@ TEST_F(NVFuserTest, FusionStandaloneOnes_CUDA) { std::vector expect; expect.reserve(dtypes.size()); for (auto dtype : dtypes) { + if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) { + continue; + } const auto options = at::TensorOptions().dtype(dtype).device(at::kCUDA, 0); expect.emplace_back(at::ones({size}, options)); @@ -183,6 +201,10 @@ TEST_F(NVFuserTest, FusionStandaloneARange_CUDA) { auto dtypes = {kFloat, kLong, kDouble}; for (auto dtype : dtypes) { + if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) { + continue; + } + auto fusion = std::make_unique(); FusionGuard fg(fusion.get()); diff --git a/torch/csrc/jit/codegen/cuda/type.cpp b/torch/csrc/jit/codegen/cuda/type.cpp index 1f6b3d32263cb..e5f6e6878aba5 100644 --- a/torch/csrc/jit/codegen/cuda/type.cpp +++ b/torch/csrc/jit/codegen/cuda/type.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include @@ -160,6 +162,17 @@ DataType getTypeFromComplexType(DataType dtype) { } } +bool isSupportedTypeByDevice(DataType dtype) { + auto prop = at::cuda::getCurrentDeviceProperties(); + auto major_ver = prop->major; + switch (dtype) { + case DataType::BFloat16: + return major_ver >= 8; + default: + return true; + } +} + bool isIntegerOp(const BinaryOpType bopt) { return bopt >= BinaryOpType::Mod && bopt <= BinaryOpType::Rshift; } diff --git a/torch/csrc/jit/codegen/cuda/type.h b/torch/csrc/jit/codegen/cuda/type.h index ea3291e9c3544..066e1921df3c1 100644 --- a/torch/csrc/jit/codegen/cuda/type.h +++ b/torch/csrc/jit/codegen/cuda/type.h @@ -101,6 +101,8 @@ int getVectorSizeFromType(DataType dtype); DataType getTypeFromVectorType(DataType dtype); // Return the corresponding scalar of a complex type DataType getTypeFromComplexType(DataType dtype); +// Return if the datatype is supported on the current device +TORCH_CUDA_CU_API bool isSupportedTypeByDevice(DataType dtype); enum class ExprType { Invalid,