Dev/kulin/nll (#189)

kulinseth · web-flow · commit ff458f083221 · 2022-12-07T17:45:20.000-08:00
* Fix the NLLLoss2D crash.

* Cleanup.
diff --git a/aten/src/ATen/native/mps/operations/LossOps.mm b/aten/src/ATen/native/mps/operations/LossOps.mm
@@ -356,19 +356,12 @@ void nllnd_loss_backward_impl(
         MPSShape* weight_shape = getMPSShape(weight);
         MPSShape* total_weight_shape = getMPSShape(total_weight);
 
-        NSString* ns_shape_key = [[input_shape valueForKey:@"description"] componentsJoinedByString:@","];
-
         string key = "nllnd_loss_backward_impl:" + to_string(numClasses) + ":" +
                                                    to_string(ignore_index) + ":" +
                                                    to_string(isWeightsArrayValid) + ":" +
                                                    reductionToString(reduction) + ":" +
-                                                   [ns_shape_key UTF8String] + ":" +
-                                                   getMPSTypeString(input.scalar_type()) + ":" +
-                                                   getMPSTypeString(target.scalar_type()) + ":" +
-                                                   getMPSTypeString(weight.scalar_type()) + ":" +
-                                                   getMPSTypeString(total_weight.scalar_type());
+                                                   getTensorsStringKey({input, target, weight, total_weight});
         CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
-
         if(!cachedGraph) {
             MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
 
@@ -408,12 +401,11 @@ void nllnd_loss_backward_impl(
                     }
 
                     float onValue = -1.0f;
+                    auto target_axis = target.defined() ? target.dim() : 1;
 
-                    MPSGraphTensor *oneHotTensor;
-
-                    oneHotTensor = [mpsGraph oneHotWithIndicesTensor:udpatedTargetTensor
+                    MPSGraphTensor *oneHotTensor = [mpsGraph oneHotWithIndicesTensor:udpatedTargetTensor
                                                                depth:numClasses
-                                                                axis:1
+                                                                axis:target_axis
                                                             dataType:inputTensor.dataType
                                                              onValue:onValue
                                                             offValue:0.0f
diff --git a/test/test_mps.py b/test/test_mps.py
@@ -2308,6 +2308,20 @@ def test_smooth_l1_loss_reduction_mean_sum_backward(self):
 
 
 class TestNLLLoss(TestCase):
+    def test_nll2d_loss_backward(self, device='mps'):
+        a = torch.randn(3, 5, requires_grad=True, device=device)
+        b = torch.tensor([1, 0, 4], device=device)
+        loss = nn.NLLLoss()
+        out = loss(a, b)
+        self.assertIsNone(out.grad_fn._saved_weight)
+        loss = nn.NLLLoss(weight=torch.ones((5,), device=device))
+        out = loss(a, b)
+        self.assertEqual(out.grad_fn._saved_weight, torch.ones((5,)))
+
+        out.sum().backward()
+        with self.assertRaisesRegex(RuntimeError, "after they have already been freed"):
+            out.grad_fn._saved_weight
+
     def test_nll_loss_mismatched_batch(self, device='mps'):
         x = torch.randn((10, 3), requires_grad=True, device=device)
         # t should have size (10,)