Merge branch 'tensorflow:master' into master

BlackPeter13 · web-flow · commit 4be24b03df16 · 2023-01-22T23:43:12.000+01:00
diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@@ -2145,6 +2145,7 @@ absl::flat_hash_set<string> GetKnownXLAAllowlistOp() {
       "Roll",
       "ScatterNd",
       "SegmentSumV2",
+      "SegmentProdV2",
       "SelfAdjointEigV2",
       "SoftmaxCrossEntropyWithLogits",
       "SpaceToBatch",
diff --git a/tensorflow/compiler/mlir/g3doc/_includes/tf_passes.md b/tensorflow/compiler/mlir/g3doc/_includes/tf_passes.md
@@ -1116,6 +1116,18 @@ tf_device.replicate([%0, %1] as %ri: tensor<*x!tf_type.resource>) {n = 2 : i32}
   tf_device.return
 }
 ```
+### `-tf-replicate-tensor-list-init-ops`: Replicate TensorList init ops for correct shape assignments in shape inference
+If we pass same TensorList to a while op as multiple arguments or just use
+the same TensorList at multiple places and assign different
+TensorListSetItem to elements of TensorList, the shape inference is then
+unable to identify the Shape of these args and thus the input TensorList
+shape is unidentifiable.
+All of these args are supposed to be independent and not related to original
+creation of TensorList.
+
+This pass will create multiple instances of TensorList for each arg of the
+while op and each use and thus there will be not a conflict in resolving the
+shape of these different inputs.
 ### `-tf-replicate-to-island`: Lowers device replicate to executor islands
 
 #### Options
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
@@ -15241,6 +15241,64 @@ has size `k`, the number of segments.}]>:$output
   TF_DerivedOperandTypeAttr Tindices = TF_DerivedOperandTypeAttr<1>;
 }
 
+def TF_SegmentProdV2Op : TF_Op<"SegmentProdV2", [Pure]> {
+  let summary = "Computes the product along segments of a tensor.";
+
+  let description = [{
+Read
+[the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+for an explanation of segments.
+
+Computes a tensor such that
+\\(output_i = \prod_j data_j\\) where the product is over `j` such
+that `segment_ids[j] == i`.
+
+If the product is empty for a given segment ID `i`, `output[i] = 1`.
+
+Note: That this op is currently only supported with jit_compile=True.
+
+The only difference with SegmentProd is the additional input  `num_segments`.
+This helps in evaluating the output shape in compile time.
+`num_segments` should be consistent with segment_ids.
+e.g. Max(segment_ids) - 1 should be equal to `num_segments` for a 1-d segment_ids
+With inconsistent num_segments, the op still runs. only difference is, 
+the output takes the size of num_segments irrespective of size of segment_ids and data.
+for num_segments less than expected output size, the last elements are ignored
+for num_segments more than the expected output size, last elements are assigned 1.
+
+For example:
+
+>>> @tf.function(jit_compile=True)
+... def test(c):
+...   return tf.raw_ops.SegmentProdV2(data=c, segment_ids=tf.constant([0, 0, 1]), num_segments=2)
+>>> c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
+>>> test(c).numpy()
+array([[4, 6, 6, 4],
+       [5, 6, 7, 8]], dtype=int32)
+  }];
+
+  let arguments = (ins
+    TF_NumberTensor:$data,
+    Arg<TF_I32OrI64Tensor, [{A 1-D tensor whose size is equal to the size of `data`'s
+first dimension.  Values should be sorted and can be repeated.
+The values must be less than `num_segments`.
+
+Caution: The values are always validated to be sorted on CPU, never validated
+on GPU.}]>:$segment_ids,
+    TF_I32OrI64Tensor:$num_segments
+  );
+
+  let results = (outs
+    Res<TF_NumberTensor, [{Has same shape as data, except for the first `segment_ids.rank`
+dimensions, which are replaced with a single dimensionw which has size
+`num_segments`.}]>:$output
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+  TF_DerivedOperandTypeAttr Tindices = TF_DerivedOperandTypeAttr<1>;
+  TF_DerivedOperandTypeAttr Tnumsegments = TF_DerivedOperandTypeAttr<2>;
+}
+
 def TF_SegmentSumOp : TF_Op<"SegmentSum", [Pure]> {
   let summary = "Computes the sum along segments of a tensor.";
 
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc
@@ -232,6 +232,7 @@ bool IsOpAllowedTf2XlaFallback(Operation* op) {
             TypeID::get<TF::RollOp>(),
             TypeID::get<TF::RoundOp>(),
             TypeID::get<TF::SegmentSumV2Op>(),
+            TypeID::get<TF::SegmentProdV2Op>(),
             TypeID::get<TF::SelectV2Op>(),
             TypeID::get<TF::SelfAdjointEigV2Op>(),
             TypeID::get<TF::SeluGradOp>(),
diff --git a/tensorflow/compiler/tests/segment_reduction_ops_test.py b/tensorflow/compiler/tests/segment_reduction_ops_test.py
@@ -45,6 +45,10 @@ def _segmentSumV2(self, data, indices, num_segments):
     return self._segmentReduction(math_ops.segment_sum_v2, data, indices,
                                   num_segments)
 
+  def _segmentProdV2(self, data, indices, num_segments):
+    return self._segmentReduction(math_ops.segment_prod_v2, data, indices,
+                                  num_segments)
+
   def _unsortedSegmentProd(self, data, indices, num_segments):
     return self._segmentReduction(math_ops.unsorted_segment_prod, data, indices,
                                   num_segments)
@@ -65,6 +69,30 @@ def testSegmentSum(self):
               np.array([0, 1, 2, 3, 4, 5], dtype=dtype),
               np.array([0, 0, 2, 3, 3, 3], dtype=np.int32), 4))
 
+  def testSegmentProd(self):
+    for dtype in self.numeric_types:
+      self.assertAllClose(
+          np.array([0, 1, 2, 60], dtype=dtype),
+          self._segmentProdV2(
+              np.array([0, 1, 2, 3, 4, 5], dtype=dtype),
+              np.array([0, 0, 2, 3, 3, 3], dtype=np.int32), 4))
+
+  def testSegmentProdNumSegmentsLess(self):
+    for dtype in self.numeric_types:
+      self.assertAllClose(
+          np.array([0, 1, 2], dtype=dtype),
+          self._segmentProdV2(
+              np.array([0, 1, 2, 3, 4, 5], dtype=dtype),
+              np.array([0, 0, 2, 3, 3, 3], dtype=np.int32), 3))
+
+  def testSegmentProdNumSegmentsMore(self):
+    for dtype in self.numeric_types:
+      self.assertAllClose(
+          np.array([0, 1, 2, 60, 1], dtype=dtype),
+          self._segmentProdV2(
+              np.array([0, 1, 2, 3, 4, 5], dtype=dtype),
+              np.array([0, 0, 2, 3, 3, 3], dtype=np.int32), 5))
+
   def testUnsortedSegmentSum0DIndices1DData(self):
     for dtype in self.numeric_types:
       self.assertAllClose(
diff --git a/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc b/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc
@@ -150,10 +150,11 @@ REGISTER_XLA_OP(
     Name("UnsortedSegmentSum").CompileTimeConstantInput("num_segments"),
     SegmentSum</*indices_are_sorted=*/false>);
 
-class UnsortedSegmentProd : public SegmentReduce {
+template <bool indices_are_sorted>
+class SegmentProd : public SegmentReduce {
  public:
-  explicit UnsortedSegmentProd(OpKernelConstruction* ctx)
-      : SegmentReduce(ctx, /*indices_are_sorted=*/false) {}
+  explicit SegmentProd(OpKernelConstruction* ctx)
+      : SegmentReduce(ctx, indices_are_sorted) {}
 
   xla::XlaOp InitialValue(xla::XlaBuilder* builder) override {
     return xla::One(builder, type_);
@@ -163,7 +164,9 @@ class UnsortedSegmentProd : public SegmentReduce {
 
 REGISTER_XLA_OP(
     Name("UnsortedSegmentProd").CompileTimeConstantInput("num_segments"),
-    UnsortedSegmentProd);
+    SegmentProd</*indices_are_sorted=*/false>);
+REGISTER_XLA_OP(Name("SegmentProdV2").CompileTimeConstantInput("num_segments"),
+                SegmentProd</*indices_are_sorted=*/true>);
 
 class UnsortedSegmentMin : public SegmentReduce {
  public:
diff --git a/tensorflow/core/api_def/base_api/api_def_SegmentProdV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_SegmentProdV2.pbtxt
@@ -0,0 +1,56 @@
+op {
+  graph_op_name: "SegmentProdV2"
+  in_arg {
+    name: "segment_ids"
+    description: <<END
+A 1-D tensor whose size is equal to the size of `data`'s
+first dimension.  Values should be sorted and can be repeated.
+The values must be less than `num_segments`.
+
+Caution: The values are always validated to be sorted on CPU, never validated
+on GPU.
+END
+  }
+  out_arg {
+    name: "output"
+    description: <<END
+Has same shape as data, except for the first `segment_ids.rank`
+dimensions, which are replaced with a single dimensionw which has size
+`num_segments`.
+END
+  }
+  summary: "Computes the product along segments of a tensor."
+  description: <<END
+Read
+[the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+for an explanation of segments.
+
+Computes a tensor such that
+\\(output_i = \prod_j data_j\\) where the product is over `j` such
+that `segment_ids[j] == i`.
+
+If the product is empty for a given segment ID `i`, `output[i] = 1`.
+
+Note: That this op is currently only supported with jit_compile=True.
+
+The only difference with SegmentProd is the additional input  `num_segments`.
+This helps in evaluating the output shape in compile time.
+`num_segments` should be consistent with segment_ids.
+e.g. Max(segment_ids) - 1 should be equal to `num_segments` for a 1-d segment_ids
+With inconsistent num_segments, the op still runs. only difference is, 
+the output takes the size of num_segments irrespective of size of segment_ids and data.
+for num_segments less than expected output size, the last elements are ignored
+for num_segments more than the expected output size, last elements are assigned 1.
+
+For example:
+
+>>> @tf.function(jit_compile=True)
+... def test(c):
+...   return tf.raw_ops.SegmentProdV2(data=c, segment_ids=tf.constant([0, 0, 1]), num_segments=2)
+>>> c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
+>>> test(c).numpy()
+array([[4, 6, 6, 4],
+       [5, 6, 7, 8]], dtype=int32)
+
+END
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_SegmentProdV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_SegmentProdV2.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "SegmentProdV2"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v2/SegmentProdV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/SegmentProdV2.pbtxt
@@ -0,0 +1,69 @@
+op 	 {
+  name: "SegmentProdV2"
+  input_arg {
+    name: "data"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "segment_ids"
+    type_attr: "Tindices"
+  }
+  input_arg {
+    name: "num_segments"
+    type_attr: "Tnumsegments"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+        type: DT_INT32
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_INT8
+        type: DT_COMPLEX64
+        type: DT_INT64
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_BFLOAT16
+        type: DT_QINT16
+        type: DT_QUINT16
+        type: DT_UINT16
+        type: DT_COMPLEX128
+        type: DT_HALF
+        type: DT_UINT32
+        type: DT_UINT64
+      }
+    }
+  }
+  attr {
+    name: "Tindices"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+  attr {
+    name: "Tnumsegments"
+    type: "type"
+    default_value {
+      type: DT_INT32
+    }
+    allowed_values {
+      list {
+        type: DT_INT32
+        type: DT_INT64
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include <limits>
+#include <vector>
 
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/numeric_op.h"
@@ -1278,7 +1280,7 @@ REGISTER_OP("SegmentSum")
     .Attr("Tindices: {int32,int64}")
     .SetShapeFn(SegmentReductionShapeFn);
 
-// TODO(hinsu): Introduce Segment{Prod,Min,Max}V2 ops, similarly.
+// TODO(hinsu): Introduce Segment{Min,Max}V2 ops, similarly.
 REGISTER_OP("SegmentSumV2")
     .Input("data: T")
     .Input("segment_ids: Tindices")
@@ -1305,6 +1307,16 @@ REGISTER_OP("SegmentProd")
     .Attr("Tindices: {int32,int64}")
     .SetShapeFn(SegmentReductionShapeFn);
 
+REGISTER_OP("SegmentProdV2")
+    .Input("data: T")
+    .Input("segment_ids: Tindices")
+    .Input("num_segments: Tnumsegments")
+    .Output("output: T")
+    .Attr("T: numbertype")
+    .Attr("Tindices: {int32,int64}")
+    .Attr("Tnumsegments: {int32,int64} = DT_INT32")
+    .SetShapeFn(shape_inference::SegmentReductionWithNumSegmentsShapeFn);
+
 REGISTER_OP("SegmentMin")
     .Input("data: T")
     .Input("segment_ids: Tindices")
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
diff --git a/tensorflow/lite/delegates/gpu/delegate.cc b/tensorflow/lite/delegates/gpu/delegate.cc
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +op {
 +  graph_op_name: "SegmentProdV2"
 +  visibility: HIDDEN
 +}