PaddlePaddle · zhoutianzi666 · Nov 28, 2023 · Nov 24, 2023 · Nov 24, 2023 · Nov 25, 2023
diff --git a/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc b/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc
@@ -829,6 +829,22 @@ nvinfer1::DimsExprs PadInferMeta(
   return output;
 }
 
+nvinfer1::DimsExprs ScatterInferMeta(
+    int output_index,
+    const nvinfer1::DimsExprs* inputs,
+    int nb_inputs,
+    nvinfer1::IExprBuilder& expr_builder,  // NOLINT
+    const framework::OpDesc& op_desc) {
+  PADDLE_ENFORCE_EQ(
+      nb_inputs,
+      3,
+      phi::errors::InvalidArgument("inputs of scatter should be equal to 3, "
+                                   "But received (%s)",
+                                   nb_inputs));
+  const nvinfer1::DimsExprs ref_dims = inputs[0];
+  return ref_dims;
+}
+
 PD_REGISTER_DYNAMIC_INFER_META_FN(gather_nd, GatherNdInferMeta);
 PD_REGISTER_DYNAMIC_INFER_META_FN(yolo_box, YoloBoxInferMeta);
 PD_REGISTER_DYNAMIC_INFER_META_FN(instance_norm, InstanceNormInferMeta);
@@ -845,6 +861,7 @@ PD_REGISTER_DYNAMIC_INFER_META_FN(p_norm, PNormInferMeta);
 PD_REGISTER_DYNAMIC_INFER_META_FN(memory_efficient_attention,
                                   MemoryEfficientAttentionInferMeta);
 PD_REGISTER_DYNAMIC_INFER_META_FN(pad, PadInferMeta);
+PD_REGISTER_DYNAMIC_INFER_META_FN(scatter, ScatterInferMeta);
 }  // namespace tensorrt
 }  // namespace inference
 }  // namespace paddle
diff --git a/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta_registry.h b/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta_registry.h
@@ -34,6 +34,7 @@ USE_TRT_DYNAMIC_INFER_META_FN(conv2d_transpose);
 USE_TRT_DYNAMIC_INFER_META_FN(memory_efficient_attention);
 USE_TRT_DYNAMIC_INFER_META_FN(p_norm);
 USE_TRT_DYNAMIC_INFER_META_FN(pad);
+USE_TRT_DYNAMIC_INFER_META_FN(scatter);
 }  // namespace tensorrt
 }  // namespace inference
 }  // namespace paddle
diff --git a/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu
@@ -403,6 +403,21 @@ bool GenericPlugin::supportsFormatCombination(
     if (pos == 2)
       return in_out[1].type == in_out[pos].type &&
              in_out[1].format == in_out[pos].format;
+  } else if (op_desc_.Type() == "scatter") {
+    // input X
+    if (pos == 0)
+      return (in_out[pos].type == nvinfer1::DataType::kFLOAT ||
+              (isFp16Supported() &&
+               in_out[pos].type == nvinfer1::DataType::kHALF)) &&
+             (in_out[pos].format == nvinfer1::TensorFormat::kLINEAR);
+    // Ids
+    if (pos == 1)
+      return (in_out[pos].type == nvinfer1::DataType::kINT32) &&
+             (in_out[pos].format == nvinfer1::TensorFormat::kLINEAR);
+    // 3:output 2:input Updates
+    if (pos == 3 || pos == 2)
+      return in_out[0].type == in_out[pos].type &&
+             in_out[0].format == in_out[pos].format;
   } else {
     return (in_out[pos].type == nvinfer1::DataType::kFLOAT ||
             (isFp16Supported() &&
@@ -563,9 +578,7 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
 
     int input_numel = 1;
     for (int k = 0; k < input_shape.size(); k++) input_numel *= input_shape[k];
-
     auto data_type_and_size = nvType2PhiType(input_desc[i].type);
-
     phi::DenseTensorMeta input_meta(data_type_and_size.first,
                                     phi::make_ddim(input_shape));
     std::shared_ptr<phi::Allocation> input_alloc(
@@ -606,9 +619,7 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
 
   CHECK_EQ(phi_kernel_contexts_[data_type]->InputsSize(), getNbInputs());
   CHECK_EQ(phi_kernel_contexts_[data_type]->OutputsSize(), getNbOutputs());
-
   (*phi_kernels_[data_type])(phi_kernel_contexts_[data_type].get());
-
   return cudaGetLastError() != cudaSuccess;
 }
 

diff --git a/paddle/phi/kernels/funcs/scatter.cu.h b/paddle/phi/kernels/funcs/scatter.cu.h
@@ -158,7 +158,6 @@ void GPUScatterAssign(const phi::GPUContext& ctx,
   } else {
     for (int i = 0; i < src_dims.size(); ++i) slice_size *= src_dims[i];
   }
-
   const T* p_src = src.data<T>();
   const IndexT* p_index = index.data<IndexT>();
   T* p_output = output->data<T>();

diff --git a/test/ir/inference/test_trt_convert_scatter.py b/test/ir/inference/test_trt_convert_scatter.py
@@ -0,0 +1,125 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from functools import partial
+from typing import List
+
+import numpy as np
+from program_config import ProgramConfig, TensorConfig
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+
+import paddle.inference as paddle_infer
+
+
+class TrtConvertScatter(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input1():
+            return np.random.random([6]).astype(np.float32)
+
+        def generate_input2():
+            return np.random.random([4, 1]).astype(np.int32)
+
+        def generate_input3():
+            return np.random.random([4]).astype(np.float32)
+
+        for overwrite in [False, True]:
+            ops_config = [
+                {
+                    "op_type": "scatter",
+                    "op_inputs": {
+                        "X": ["input_data"],
+                        "Ids": ["index_data"],
+                        "Updates": ["update_data"],
+                    },
+                    "op_outputs": {"Out": ["output_data"]},
+                    "op_attrs": {"overwrite": overwrite},
+                }
+            ]
+            ops = self.generate_op_config(ops_config)
+            program_config = ProgramConfig(
+                ops=ops,
+                weights={},
+                inputs={
+                    "input_data": TensorConfig(
+                        data_gen=partial(generate_input1)
+                    ),
+                    "index_data": TensorConfig(
+                        data_gen=partial(generate_input2)
+                    ),
+                    "update_data": TensorConfig(
+                        data_gen=partial(generate_input3)
+                    ),
+                },
+                outputs=["output_data"],
+            )
+
+            yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            self.dynamic_shape.min_input_shape = {
+                "input_data": [1],
+                "index_data": [2, 1],
+                "update_data": [1],
+            }
+            self.dynamic_shape.max_input_shape = {
+                "input_data": [6],
+                "index_data": [4, 1],
+                "update_data": [4],
+            }
+            self.dynamic_shape.opt_input_shape = {
+                "input_data": [6],
+                "index_data": [4, 1],
+                "update_data": [4],
+            }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        # clear_dynamic_shape()
+        # self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        # program_config.set_input_type(np.float32)
+        # yield self.create_inference_config(), (0, 5), 1e-5
+        # self.trt_param.precision = paddle_infer.PrecisionType.Half
+        # program_config.set_input_type(np.float16)
+        # yield self.create_inference_config(), (0, 5), 1e-3
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        program_config.set_input_type(np.float32)
+        yield self.create_inference_config(), (1, 4), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        program_config.set_input_type(np.float16)
+        yield self.create_inference_config(), (1, 4), 1e-3
+
+    def test(self):
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()