Fix index_add type issue (#235)

Ronian526 · kulinseth · commit 5974b4b13a2d · 2023-02-05T14:52:53.000-08:00
* - changing alphaTensor type into using self.scalar_type()
- remove index_add from blocklist
- block bool,int16,int64,float16,uint8 as scatterWithDataTensor giving wrong results

* - casting tensors so that scatterWithDataTensor uses only float32 and int32
- throw an error for unsupported index_add for int64
diff --git a/aten/src/ATen/native/mps/operations/Indexing.mm b/aten/src/ATen/native/mps/operations/Indexing.mm
@@ -503,12 +503,14 @@ Tensor flip_mps(const Tensor& self, IntArrayRef dims) {
   using namespace mps;
   MPSStream* stream = getCurrentMPSStream();
   dim = maybe_wrap_dim(dim, self.dim());
-  if (index.numel() == 0) {
+  auto numel = index.numel();
+
+  if (numel == 0) {
     return;
   }
 
-  TORCH_CHECK(source.scalar_type() != ScalarType::Long, "index_add(): Expected non int64 dtype for source.");
-  auto casted_type = isFloatingType(source.scalar_type()) ? ScalarType::Float : ScalarType::Int;
+  TORCH_CHECK(self.scalar_type() != ScalarType::Long,
+                "MPS: does not support index_add op with int64 input");
 
   struct CachedGraph : public MPSCachedGraph
   {
@@ -538,26 +540,46 @@ Tensor flip_mps(const Tensor& self, IntArrayRef dims) {
           MPSGraphTensor* inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, self);
           MPSGraphTensor* indexTensor = mpsGraphRankedPlaceHolder(mpsGraph, index);
           MPSGraphTensor* sourceTensor = mpsGraphRankedPlaceHolder(mpsGraph, source);
-          MPSGraphTensor* alphaTensor = mpsGraphScalarPlaceHolder(mpsGraph, getMPSScalarType(casted_type));
-          MPSGraphTensor* castedInputTensor = inputTensor;
-          MPSGraphTensor* castedSourceTensor = sourceTensor;
-          if (source.scalar_type() != casted_type) {
-              castedInputTensor = castMPSTensor(mpsGraph, castedInputTensor, casted_type);
-              castedSourceTensor = castMPSTensor(mpsGraph, castedSourceTensor, casted_type);
+          MPSGraphTensor* alphaTensor = mpsGraphScalarPlaceHolder(mpsGraph, getMPSScalarType(self.scalar_type()));
+
+          MPSGraphTensor* castInputTensor = inputTensor;
+          MPSGraphTensor* castSourceTensor = sourceTensor;
+          MPSGraphTensor* castAlphaTensor = alphaTensor;
+
+          MPSDataType dataType = [inputTensor dataType];
+
+          // failure due to issue #104289647: Wrong results from scatterWithDataTensor
+          if (dataType != MPSDataTypeInt32 &&
+            dataType != MPSDataTypeFloat32) {
+            dataType = (dataType & MPSDataTypeFloatBit) ? MPSDataTypeFloat32 : MPSDataTypeInt32;
+            castInputTensor = [mpsGraph castTensor:inputTensor
+                                            toType:dataType
+                                            name:@"castInputTensor"];
+            castSourceTensor = [mpsGraph castTensor:sourceTensor
+                                            toType:dataType
+                                            name:@"castSourceTensor"];
+            castAlphaTensor = [mpsGraph castTensor:alphaTensor
+                                            toType:dataType
+                                            name:@"castAlphaTensor"];
           }
-          MPSGraphTensor* alphaSourceSlice = [mpsGraph multiplicationWithPrimaryTensor:castedSourceTensor
-                                                                       secondaryTensor:alphaTensor
-                                                                                  name:nil];
 
-          MPSGraphTensor* outputTensor = [mpsGraph scatterWithDataTensor:castedInputTensor
+          MPSGraphTensor* alphaSourceSlice = [mpsGraph multiplicationWithPrimaryTensor:castSourceTensor
+                                                                       secondaryTensor:castAlphaTensor
+                                                                                  name:nil];
+          MPSGraphTensor* outputTensor = [mpsGraph scatterWithDataTensor:castInputTensor
                                                             updatesTensor:alphaSourceSlice
                                                             indicesTensor:indexTensor
                                                                      axis:dim
                                                                      mode:MPSGraphScatterModeAdd
                                                                      name:nil];
-          if (source.scalar_type() != casted_type) {
-              outputTensor = castMPSTensor(mpsGraph, outputTensor, source.scalar_type());
-          }
+          dataType = [inputTensor dataType];
+          if (dataType != MPSDataTypeInt32 &&
+              dataType != MPSDataTypeFloat32) {
+              outputTensor = [mpsGraph castTensor:outputTensor
+                                            toType:[inputTensor dataType]
+                                            name:@"castOutputTensor"];
+            }
+
           newCachedGraph->inputTensor_ = inputTensor;
           newCachedGraph->indexTensor_ = indexTensor;
           newCachedGraph->sourceTensor_ = sourceTensor;
@@ -572,7 +594,7 @@ Tensor flip_mps(const Tensor& self, IntArrayRef dims) {
     Placeholder indexPlaceholder = Placeholder(cachedGraph->indexTensor_, index);
     Placeholder sourcePlaceholder = Placeholder(cachedGraph->sourceTensor_, source);
     Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor_, result);
-    MPSScalar alpha_scalar = getMPSScalar(alpha, casted_type);
+    MPSScalar alpha_scalar = getMPSScalar(alpha, self.scalar_type());
 
     NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
       selfPlaceholder.getMPSGraphTensor() : selfPlaceholder.getMPSGraphTensorData(),
diff --git a/test/test_mps.py b/test/test_mps.py
@@ -9238,7 +9238,8 @@ class TestConsistency(TestCase):
         'xlogy': ['b8', 'f16', 'f32', 'i16', 'i32', 'i64', 'u8'],
         'zero_': ['b8', 'f16', 'f32', 'i16', 'i32', 'i64', 'u8'],
         'zeros': ['b8', 'f16', 'f32', 'i16', 'i32', 'i64', 'u8'],
-        'zeros_like': ['b8', 'f16', 'f32', 'i16', 'i32', 'i64', 'u8']
+        'zeros_like': ['b8', 'f16', 'f32', 'i16', 'i32', 'i64', 'u8'],
+        'index_add': ['b8', 'f16', 'f32', 'i16', 'i32', 'i64', 'u8'],
     }
 
     ALLOWLIST_OP_GRAD = {
@@ -9421,7 +9422,6 @@ class TestConsistency(TestCase):
     # All the entries in this list should be removed
     BLOCKLIST = {
         # Functions that hard crash
-        'index_add': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
         'nn.functional.softplus': [torch.float32],
         'nonzero': [torch.bool, torch.uint8, torch.float16],
         'median': [torch.float32, torch.int16, torch.int32, torch.uint8, torch.int16],