diff --git a/torch/csrc/jit/codegen/cuda/test/test_gpu_tensor_factories.cpp b/torch/csrc/jit/codegen/cuda/test/test_gpu_tensor_factories.cpp
index a1298e6621e0b..76c7b811db783 100644
--- a/torch/csrc/jit/codegen/cuda/test/test_gpu_tensor_factories.cpp
+++ b/torch/csrc/jit/codegen/cuda/test/test_gpu_tensor_factories.cpp
@@ -43,6 +43,9 @@ TEST_F(NVFuserTest, FusionStandaloneFull_CUDA) {
   fusion->addInput(fill_val2);
   fusion->addInput(fill_val3);
   for (auto dtype : dtypes) {
+    if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
+      continue;
+    }
     auto out_tv = full({size}, fill_val1, aten_to_data_type(dtype));
     fusion->addOutput(out_tv);
     out_tv = full({size, size}, fill_val2, aten_to_data_type(dtype));
@@ -57,6 +60,9 @@ TEST_F(NVFuserTest, FusionStandaloneFull_CUDA) {
     std::vector<at::Tensor> expect;
     expect.reserve(dtypes.size());
     for (auto dtype : dtypes) {
+      if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
+        continue;
+      }
       const auto options =
           at::TensorOptions().dtype(dtype).device(at::kCUDA, 0);
       expect.emplace_back(at::full({size}, 11, options));
@@ -94,6 +100,9 @@ TEST_F(NVFuserTest, FusionStandaloneZeros_CUDA) {
   Val* size = IrBuilder::create<Int>();
   fusion->addInput(size);
   for (auto dtype : dtypes) {
+    if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
+      continue;
+    }
     auto out_tv = zeros({size}, aten_to_data_type(dtype));
     fusion->addOutput(out_tv);
     out_tv = zeros({size, size}, aten_to_data_type(dtype));
@@ -108,6 +117,9 @@ TEST_F(NVFuserTest, FusionStandaloneZeros_CUDA) {
     std::vector<at::Tensor> expect;
     expect.reserve(dtypes.size());
     for (auto dtype : dtypes) {
+      if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
+        continue;
+      }
       const auto options =
           at::TensorOptions().dtype(dtype).device(at::kCUDA, 0);
       expect.emplace_back(at::zeros({size}, options));
@@ -145,6 +157,9 @@ TEST_F(NVFuserTest, FusionStandaloneOnes_CUDA) {
   Val* size = IrBuilder::create<Int>();
   fusion->addInput(size);
   for (auto dtype : dtypes) {
+    if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
+      continue;
+    }
     auto out_tv = ones({size}, aten_to_data_type(dtype));
     fusion->addOutput(out_tv);
     out_tv = ones({size, size}, aten_to_data_type(dtype));
@@ -159,6 +174,9 @@ TEST_F(NVFuserTest, FusionStandaloneOnes_CUDA) {
     std::vector<at::Tensor> expect;
     expect.reserve(dtypes.size());
     for (auto dtype : dtypes) {
+      if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
+        continue;
+      }
       const auto options =
           at::TensorOptions().dtype(dtype).device(at::kCUDA, 0);
       expect.emplace_back(at::ones({size}, options));
@@ -183,6 +201,10 @@ TEST_F(NVFuserTest, FusionStandaloneARange_CUDA) {
   auto dtypes = {kFloat, kLong, kDouble};
 
   for (auto dtype : dtypes) {
+    if (!isSupportedTypeByDevice(aten_to_data_type(dtype))) {
+      continue;
+    }
+
     auto fusion = std::make_unique<Fusion>();
     FusionGuard fg(fusion.get());
 
diff --git a/torch/csrc/jit/codegen/cuda/type.cpp b/torch/csrc/jit/codegen/cuda/type.cpp
index 1f6b3d32263cb..e5f6e6878aba5 100644
--- a/torch/csrc/jit/codegen/cuda/type.cpp
+++ b/torch/csrc/jit/codegen/cuda/type.cpp
@@ -1,5 +1,7 @@
 #include <torch/csrc/jit/codegen/cuda/type.h>
 
+#include <ATen/cuda/CUDAContext.h>
+
 #include <stdexcept>
 #include <unordered_map>
 
@@ -160,6 +162,17 @@ DataType getTypeFromComplexType(DataType dtype) {
   }
 }
 
+bool isSupportedTypeByDevice(DataType dtype) {
+  auto prop = at::cuda::getCurrentDeviceProperties();
+  auto major_ver = prop->major;
+  switch (dtype) {
+    case DataType::BFloat16:
+      return major_ver >= 8;
+    default:
+      return true;
+  }
+}
+
 bool isIntegerOp(const BinaryOpType bopt) {
   return bopt >= BinaryOpType::Mod && bopt <= BinaryOpType::Rshift;
 }
diff --git a/torch/csrc/jit/codegen/cuda/type.h b/torch/csrc/jit/codegen/cuda/type.h
index ea3291e9c3544..066e1921df3c1 100644
--- a/torch/csrc/jit/codegen/cuda/type.h
+++ b/torch/csrc/jit/codegen/cuda/type.h
@@ -101,6 +101,8 @@ int getVectorSizeFromType(DataType dtype);
 DataType getTypeFromVectorType(DataType dtype);
 // Return the corresponding scalar of a complex type
 DataType getTypeFromComplexType(DataType dtype);
+// Return if the datatype is supported on the current device
+TORCH_CUDA_CU_API bool isSupportedTypeByDevice(DataType dtype);
 
 enum class ExprType {
   Invalid,