kulinseth · kulinseth · Jul 12, 2022 · Jul 9, 2022 · kulinseth · Jul 9, 2022
@@ -639,11 +639,19 @@ void check_shape_except_dim(const Tensor &first, const Tensor &second,
 
           // Create placeholders
           MPSGraphTensor* inputMPSGraphTensors[inputs.size()];
+          MPSGraphTensor* castInputMPSGraphTensors[inputs.size()];
 
-          for(int i = 0; i < inputs.size(); i++)
+          for(int i = 0; i < inputs.size(); i++) {
             inputMPSGraphTensors[i] = mpsGraphUnrankedPlaceHolder(mpsGraph, getMPSDataType(result_type(inputs)));
+            if(getMPSDataType(result_type(inputs)) == MPSDataTypeBool)
+              castInputMPSGraphTensors[i] = [mpsGraph castTensor:inputMPSGraphTensors[i]
+                                                      toType:MPSDataTypeInt32
+                                                        name:[NSString stringWithFormat:@"inputTensor_%@", [NSNumber numberWithInt:i]]];
+            else
+              castInputMPSGraphTensors[i] = inputMPSGraphTensors[i];
+          }
 
-          auto inputTensorsArray = [NSArray arrayWithObjects:inputMPSGraphTensors
+          auto inputTensorsArray = [NSArray arrayWithObjects:castInputMPSGraphTensors
                                                        count:inputs.size()];
           // Use concatTensors to concatenate
           MPSGraphTensor* outputTensor = [mpsGraph concatTensors:inputTensorsArray
@@ -654,6 +662,10 @@ void check_shape_except_dim(const Tensor &first, const Tensor &second,
 
           for(int i = 0; i < inputs.size(); i++)
             newCachedGraph->inputMPSGraphTensors_[i] = inputMPSGraphTensors[i];
+          if(getMPSDataType(result_type(inputs)) == MPSDataTypeBool)
+            outputTensor = [mpsGraph castTensor:outputTensor
+                                         toType:MPSDataTypeBool
+                                           name:@"outputTensor"];
           newCachedGraph->outputTensor_ = outputTensor;
         }
         return newCachedGraph;

@@ -3390,22 +3390,44 @@ def helper(shape, padding, op):
     # Test stack forward
     def test_stack(self):
         # All shapes must be same
-        def helper(shape):
-            cpu_x = torch.randn(shape, device='cpu', dtype=torch.float, requires_grad=False)
-            x = cpu_x.detach().clone().to('mps')
+        def helper(shape, dtype=torch.float32):
 
-            cpu_y = torch.randn(shape, device='cpu', dtype=torch.float, requires_grad=False)
-            y = cpu_y.detach().clone().to('mps')
+            x, cpu_x = None, None
+            y, cpu_y = None, None
+            z, cpu_z = None, None
 
-            cpu_z = torch.randn(shape, device='cpu', dtype=torch.float, requires_grad=False)
-            z = cpu_z.detach().clone().to('mps')
+            if(dtype not in [torch.float32, torch.bool]):
+                cpu_x = torch.randint(50, shape, device='cpu', dtype=dtype, requires_grad=False)
+                x = cpu_x.detach().clone().to('mps')
+                cpu_y = torch.randint(50, shape, device='cpu', dtype=dtype, requires_grad=False)
+                y = cpu_y.detach().clone().to('mps')
+                cpu_z = torch.randint(50, shape, device='cpu', dtype=dtype, requires_grad=False)
+                z = cpu_z.detach().clone().to('mps')
+            elif (dtype == torch.bool):
+                cpu_x = torch.randint(2, shape, device='cpu', dtype=dtype, requires_grad=False)
+                x = cpu_x.detach().clone().to('mps')
+                cpu_y = torch.randint(2, shape, device='cpu', dtype=dtype, requires_grad=False)
+                y = cpu_y.detach().clone().to('mps')
+                cpu_z = torch.randint(2, shape, device='cpu', dtype=dtype, requires_grad=False)
+                z = cpu_z.detach().clone().to('mps')
+            else:
+                cpu_x = torch.randn(shape, device='cpu', dtype=dtype, requires_grad=True)
+                x = cpu_x.detach().clone().to('mps').requires_grad_()
+                cpu_y = torch.randn(shape, device='cpu', dtype=dtype, requires_grad=True)
+                y = cpu_y.detach().clone().to('mps').requires_grad_()
+                cpu_z = torch.randn(shape, device='cpu', dtype=dtype, requires_grad=True)
+                z = cpu_z.detach().clone().to('mps').requires_grad_()
 
             stack = torch.stack([x, y, z], dim=1)
             stack_cpu = torch.stack([cpu_x, cpu_y, cpu_z], dim=1)
 
             self.assertEqual(stack, stack_cpu)
 
         helper([2, 8, 4, 5])
+        helper([2, 8, 4, 5], dtype=torch.float16)
+        helper([2, 8, 4, 5], dtype=torch.int32)
+        helper([2, 8, 4, 5], dtype=torch.int64)
+        helper([2, 8, 4, 5], dtype=torch.bool)
         # Empty test - Currently failing! Empty tensor not handled!
         # helper([0, 2, 4, 5])