Make intermediate type for cumsum ScalarType::Int (#221)

abhudev · abhipathak97 · web-flow · commit 8c0bb00832e2 · 2023-01-09T09:24:07.000-08:00
* Make intermediate type for cumsum ScalarType::Int

* Disallow int64 as input for cumsum

* Fix error message; add test

Co-authored-by: abhipathak97 &lt;abhipathak97@mps10.scv.apple.com&gt;
diff --git a/aten/src/ATen/native/mps/operations/UnaryOps.mm b/aten/src/ATen/native/mps/operations/UnaryOps.mm
@@ -262,11 +262,12 @@ void unary_op(const Tensor& self, const Tensor& output, std::string op_name, Una
     return;
   }
   auto input = dtype.has_value() ? self.to(dtype.value()) : self;
+  TORCH_CHECK(input.scalar_type() != ScalarType::Long, "MPS does not support cumsum op with int64 input");
   mps::unary_op(input, result, "cumsum_out_mp" + std::to_string(dim),
                 ^ MPSGraphTensor* (MPSGraph* mpsGraph, MPSGraphTensor* inputTensor) {
        // cumsum is horribly broken for int8, int16 and as chances for overflow is pretty high, cast to int32
        if (isIntegralType(input.scalar_type()) && input.scalar_type() !=ScalarType::Int) {
-           inputTensor = mps::castMPSTensor(mpsGraph, inputTensor, result.scalar_type());
+           inputTensor = mps::castMPSTensor(mpsGraph, inputTensor, ScalarType::Int);
        }
        auto rc = [mpsGraph cumulativeSumWithTensor: inputTensor
                                               axis: dim
diff --git a/test/test_mps.py b/test/test_mps.py
@@ -2165,6 +2165,23 @@ def test_from_numpy_non_contiguous(self):
         t_mps = torch.tensor(a, device="mps")
         self.assertEqual(t_cpu, t_mps.to("cpu"))
 
+    def test_cumsum_all_dtypes(self):
+        def helper(dtype):
+            t = torch.tensor([1,1,1,1], device="mps", dtype=dtype)
+            t_cpu = torch.tensor([1,1,1,1], device="cpu")
+
+            a = t.cumsum(0, dtype=dtype)
+            a_cpu = t_cpu.cumsum(0, dtype=dtype)
+
+            self.assertEqual(a.cpu(), a_cpu)
+        [helper(dtype) for dtype in [torch.int8, torch.int16, torch.int32, torch.float32]]
+
+        try:
+            helper(torch.int64)
+        except Exception as e:
+            e_string = str(e)
+            self.assertEqual(e_string, "MPS does not support cumsum op with int64 input")
+
 
 class TestLogical(TestCase):
     def _wrap_tensor(self, x, device="cpu", dtype=None, requires_grad=False):

Original file line number	Diff line number	Diff line change
`@@ -262,11 +262,12 @@ void unary_op(const Tensor& self, const Tensor& output, std::string op_name, Una`
`262`	`262`	`return;`
`263`	`263`	`}`
`264`	`264`	`auto input = dtype.has_value() ? self.to(dtype.value()) : self;`
	`265`	`+ TORCH_CHECK(input.scalar_type() != ScalarType::Long, "MPS does not support cumsum op with int64 input");`
`265`	`266`	`mps::unary_op(input, result, "cumsum_out_mp" + std::to_string(dim),`
`266`	`267`	`^ MPSGraphTensor* (MPSGraph* mpsGraph, MPSGraphTensor* inputTensor) {`
`267`	`268`	`// cumsum is horribly broken for int8, int16 and as chances for overflow is pretty high, cast to int32`
`268`	`269`	`if (isIntegralType(input.scalar_type()) && input.scalar_type() !=ScalarType::Int) {`
`269`		`- inputTensor = mps::castMPSTensor(mpsGraph, inputTensor, result.scalar_type());`
	`270`	`+ inputTensor = mps::castMPSTensor(mpsGraph, inputTensor, ScalarType::Int);`
`270`	`271`	`}`
`271`	`272`	`auto rc = [mpsGraph cumulativeSumWithTensor: inputTensor`
`272`	`273`	`axis: dim`