[Relay, TOPI] Add negative log likelihood loss (nll_loss) op (apache#…

…8056) * add nll_loss * enrich the doc and rename parameters * update upon review * add tests * update based on reviews * update upon reviews * update upon reviews
zxy844288792 · Jan 13, 2022 · e0ed97a · e0ed97a
1 parent 420e7c6
commit e0ed97a
Show file tree

Hide file tree

Showing 15 changed files with 482 additions and 0 deletions.
diff --git a/include/tvm/relay/attrs/nn.h b/include/tvm/relay/attrs/nn.h
@@ -1424,6 +1424,19 @@ struct BatchToSpaceNDAttrs : public tvm::AttrsNode<BatchToSpaceNDAttrs> {
   }
 };  // struct BatchToSpaceNDAttrs
 
+/*! \brief Attributes used in NLLLoss operator */
+struct NLLLossAttrs : public tvm::AttrsNode<NLLLossAttrs> {
+  std::string reduction;
+  int ignore_index;
+
+  TVM_DECLARE_ATTRS(NLLLossAttrs, "relay.attrs.NLLLossAttrs") {
+    TVM_ATTR_FIELD(reduction).set_default("mean").describe(
+        "The reduction method to apply to the output. Can be"
+        "'none', 'mean' or 'sum'.");
+    TVM_ATTR_FIELD(ignore_index).describe("The target value to ignore.");
+  }
+};  // struct NLLLossAttrs
+
 }  // namespace relay
 }  // namespace tvm
 #endif  // TVM_RELAY_ATTRS_NN_H_
diff --git a/include/tvm/topi/nn.h b/include/tvm/topi/nn.h
@@ -29,6 +29,7 @@
 #include <tvm/tir/expr.h>
 #include <tvm/tir/op.h>
 #include <tvm/topi/detail/constant_utils.h>
+#include <tvm/topi/reduction.h>
 #include <tvm/topi/tags.h>
 #include <tvm/topi/transform.h>
 
@@ -642,6 +643,53 @@ inline tvm::te::Tensor batch_to_space_nd(const tvm::te::Tensor& data,
   out = strided_slice(out, begin_idx, end_idx, strides);
   return out;
 }
+
+/*!
+ * \brief Negative log likelihood loss.
+ *
+ * \param predictions The prediction tensor.
+ * \param targets The target tensor.
+ * \param weights A manual rescaling weight given to each class.
+ * \param reduction The reduction method to apply to the output.
+ * \param ignore_index The target value to ignore.
+ * \param name The name of the operation.
+ * \param tag The tag to mark the operation.
+ *
+ * \return The negative log likelihood loss of the predictions and targets.
+ */
+inline Tensor nll_loss(const Tensor& predictions, const Tensor& targets, const Tensor& weights,
+                       std::string reduction = "mean", int ignore_index = -100,
+                       const std::string name = "nll_loss", const std::string tag = kBroadcast) {
+  auto T = tvm::te::compute(
+      targets->shape,
+      [&](const tvm::Array<tvm::tir::Var>& target_indices) {
+        auto c = targets(target_indices);
+        tvm::Array<tvm::PrimExpr> pred_indices;
+        pred_indices.push_back(target_indices[0]);  // batch index
+        pred_indices.push_back(c);                  // class index
+        for (size_t i = 1; i < target_indices.size(); i++) {
+          pred_indices.push_back(target_indices[i]);  // indices for multidimensional loss
+        }
+        return tvm::tir::Select(c != ignore_index, -predictions(pred_indices) * weights(c),
+                                tvm::tir::make_const(predictions->dtype, 0));
+      },
+      name, tag);
+  if (reduction == "mean") {
+    auto W = tvm::te::compute(
+        targets->shape,
+        [&](const tvm::Array<tvm::tir::Var>& target_indices) {
+          auto c = targets(target_indices);
+          return tvm::tir::Select(c != ignore_index, weights(c),
+                                  tvm::tir::make_const(predictions->dtype, 0));
+        },
+        name, tag);
+    return topi::divide(topi::sum(T, {}), topi::sum(W, {}));
+  } else if (reduction == "sum") {
+    return topi::sum(T, {});
+  } else {  // reduction == "none"
+    return T;
+  }
+}
 }  // namespace topi
 }  // namespace tvm
 #endif  // TVM_TOPI_NN_H_
diff --git a/python/tvm/relay/frontend/pytorch.py b/python/tvm/relay/frontend/pytorch.py
@@ -2320,6 +2320,20 @@ def unique(self, inputs, input_types):
             unique_sliced = _op.strided_slice(unique, begin=[0], end=num_uniq, slice_mode="size")
             return (unique_sliced, inverse_indices)
 
+    def nll_loss(self, inputs, input_types):
+        assert len(inputs) == 5
+        [predictions, targets, weights, reduction, ignore_index] = inputs
+        num_class = self.infer_shape(predictions)[1]
+        if reduction == 0:
+            reduction = "none"
+        elif reduction == 1:
+            reduction = "mean"
+        else:
+            reduction = "sum"
+        if weights is None:
+            weights = _op.full(_expr.const(1), (num_class,), dtype=input_types[0])
+        return _op.nn.nll_loss(predictions, targets, weights, reduction, ignore_index)
+
     # Operator mappings
     def create_convert_map(self):
         self.convert_map = {
@@ -2532,6 +2546,8 @@ def create_convert_map(self):
             "aten::argsort": self.argsort,
             "aten::sort": self.sort,
             "aten::_unique2": self.unique,
+            "aten::nll_loss": self.nll_loss,
+            "aten::nll_loss2d": self.nll_loss,
         }
 
     def update_convert_map(self, custom_map):

diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py
@@ -886,6 +886,17 @@ def compute_cross_entropy_with_logits(attrs, inputs, out_dtype):
 reg.register_pattern("nn.cross_entropy_with_logits", OpPattern.OPAQUE)
 
 
+# nll_loss
+@reg.register_compute("nn.nll_loss")
+def compute_nll_loss(attrs, inputs, out_dtype):
+    predictions, targets, weights = inputs
+    return [topi.nn.nll_loss(predictions, targets, weights, attrs.reduction, attrs.ignore_index)]
+
+
+reg.register_reduce_schedule("nn.nll_loss")
+reg.register_pattern("nn.nll_loss", OpPattern.OUT_ELEMWISE_FUSABLE)
+
+
 # depth_to_space
 @reg.register_compute("nn.depth_to_space")
 def compute_depth_to_space(attrs, inputs, out_dtype):

diff --git a/python/tvm/relay/op/nn/nn.py b/python/tvm/relay/op/nn/nn.py
@@ -2974,6 +2974,42 @@ def cross_entropy_with_logits(predictions, targets):
     return _make.cross_entropy_with_logits(predictions, targets)
 
 
+def nll_loss(predictions, targets, weights, reduction="mean", ignore_index=-100):
+    """Negative log likelihood loss.
+
+    output{n, i_1, i_2, ..., i_k} = -p * w
+      where t = target{n, i_1, i_2, ..., i_k}
+            p = predictions{n, t, i_1, i_2, i_k}
+            w = weights{n, i_1, i_2, ..., i_k} if t != ignore_index else 0
+
+    result = reduction(output)
+
+    Parameters
+    ----------
+    predictions : tvm.relay.Expr
+      The predictions.
+
+    targets : tvm.relay.Expr
+      The target value of each prediction.
+
+    weights : tvm.relay.Expr
+      The weight of each target value.
+
+    reduction : string
+      The reduction method to apply to the output.
+      Possible values are "mean", "sum" and "none".
+
+    ignore_index : int
+      The target value to ignore.
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+      The computed result.
+    """
+    return _make.nll_loss(predictions, targets, weights, reduction, ignore_index)
+
+
 def depth_to_space(data, block_size, layout="NCHW", mode="DCR"):
     """Convert channels into spatial blocks.
 

diff --git a/python/tvm/relay/op/op_attrs.py b/python/tvm/relay/op/op_attrs.py
@@ -572,3 +572,8 @@ class ThreefryGenerateAttrs(Attrs):
 @tvm._ffi.register_object("relay.attrs.UniformAttrs")
 class UniformAttrs(Attrs):
     """Attributes used in UniformAttrs operators"""
+
+
+@tvm._ffi.register_object("relay.attrs.NLLLossAttrs")
+class NLLLossAttrs(Attrs):
+    """Attributes for nn.nll_loss"""
diff --git a/python/tvm/topi/nn/__init__.py b/python/tvm/topi/nn/__init__.py
@@ -49,3 +49,4 @@
 from .space_to_depth import *
 from .space_to_batch_nd import *
 from .batch_to_space_nd import *
+from .loss import *
diff --git a/python/tvm/topi/nn/loss.py b/python/tvm/topi/nn/loss.py
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name,unused-argument
+"""Loss functions definitions."""
+from __future__ import absolute_import
+from . import cpp
+
+
+def nll_loss(predictions, targets, weights, reduction, ignore_index):
+    """Negative log likelihood loss on the input data.
+
+    output{n, i_1, i_2, ..., i_k} = -p * w
+      where t = target{n, i_1, i_2, ..., i_k}
+            p = predictions{n, t, i_1, i_2, i_k}
+            w = weights{n, i_1, i_2, ..., i_k} if t != ignore_index else 0
+
+    result = reduction(output)
+
+    Parameters
+    ----------
+    predictions : tvm.te.Tensor
+        (k+2)-D with shape (N, C, d_1, d_2, ..., d_k),
+        where C is the number of target classes
+
+    targets : tvm.te.Tensor
+        (k+1)-D with shape (N, d_1, d_2, ..., d_k)
+        The target value of the input.
+
+    weights : tvm.te.Tensor
+        1-D with shape (C,)
+        The weight of each target value.
+
+    reduction : string
+        The reduction method to apply to output.
+        Can be "mean", "sum" or "none".
+
+    ignore_index : int
+        The target value to ignore.
+
+    Returns
+    -------
+    output : tvm.te.Tensor
+        a scalar if the reduction type is "mean" or "sum",
+        otherwise the same shape as `target`.
+    """
+    return cpp.nn.nll_loss(predictions, targets, weights, reduction, ignore_index)
diff --git a/python/tvm/topi/testing/__init__.py b/python/tvm/topi/testing/__init__.py
@@ -69,3 +69,4 @@
 from .matrix_set_diag import matrix_set_diag
 from .space_to_batch_nd import space_to_batch_nd_python
 from .batch_to_space_nd import batch_to_space_nd_python
+from .nll_loss import nll_loss
diff --git a/python/tvm/topi/testing/nll_loss.py b/python/tvm/topi/testing/nll_loss.py
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name
+"""NLLLoss in python"""
+import numpy as np
+
+
+def nll_loss(predictions, targets, weights, reduction="mean", ignore_index=-100):
+    """nll_loss operator implemented in numpy.
+
+    output{n, i_1, i_2, ..., i_k} = -p * w
+      where t = target{n, i_1, i_2, ..., i_k}
+            p = predictions{n, t, i_1, i_2, i_k}
+            w = weights{n, i_1, i_2, ..., i_k} if t != ignore_index else 0
+
+    result = reduction(output)
+
+    Parameters
+    ----------
+    predictions : numpy.ndarray
+        (k+2)-D with shape (N, C, d_1, d_2, ..., d_k),
+        where C is the number of target classes
+
+    targets : numpy.ndarray
+        (k+1)-D with shape (N, d_1, d_2, ..., d_k)
+        The target value of the input.
+
+    weights : numpy.ndarray
+        1-D with shape (C,)
+        The weight of each target value.
+
+    reduction : string
+        The reduction method to apply to output.
+        Can be "mean", "sum" or "none".
+
+    ignore_index : int
+        The target value to ignore.
+
+    Returns
+    -------
+    output : numpy.ndarray
+        a scalar if the reduction type is "mean" or "sum",
+        otherwise the same shape as `target`.
+    """
+    res = np.zeros(targets.shape)
+    weight_sum = 0.0
+    for index in np.ndindex(targets.shape):
+        class_id = targets[index]
+        if class_id != ignore_index:
+            index_list = list(index)
+            pred_index = tuple(index_list[:1] + [class_id] + index_list[1:])
+            res[index] = -predictions[pred_index] * weights[class_id]
+            weight_sum += weights[class_id]
+    if reduction == "mean":
+        return np.sum(res) / weight_sum
+    if reduction == "sum":
+        return np.sum(res)
+    return res